C   80

extract title

Guest on 27th April 2022 01:48:00 AM

  1. #include <stdio.h>
  2. #include <string.h>
  3.  
  4. typedef int BOOL;
  5. #define TRUE 1
  6. #define FALSE 0
  7.  
  8. main(argc, argv)
  9. int argc;
  10. char **argv;
  11. {
  12.     FILE *fp;
  13.     char filename[256];
  14.     char buffer[1024];
  15.     char *p;
  16.     BOOL found = FALSE;
  17.     BOOL tag = FALSE;
  18.     int lines = 0;
  19.  
  20.     if (argc != 3) {
  21.         fprintf(stderr, "\n\
  22. This program takes an HTML document and extracts to its stdout\n\
  23. the TITLE of the document, all in one line.\n\n\
  24. Usage:\n\
  25. \t%s directory filename\n\n", argv[0]);
  26.         exit(1);
  27.     }
  28.  
  29.     strcpy(filename, argv[1]);
  30.     if (*filename)
  31.       strcat(filename, "/");
  32.     strcat(filename, argv[2]);
  33.  
  34.     if (!(fp = fopen(filename, "r"))) {
  35.         fprintf(stderr, "%s: Unable to open file \"%s\"\n",
  36.                 argv[0], filename);
  37.         exit(2);
  38.     }
  39.  
  40.     while (lines++ < 20 &&      /* Scan only first 20 lines */
  41.            NULL != (p = fgets(buffer, 1024, fp))) {
  42.         if (*p)
  43.             p[strlen(p)-1] = NULL;      /* Overwrite newline */
  44.         while (p && *p) {
  45.             if (tag) {
  46.                 p = strchr(p, '>');
  47.                 if (p) {
  48.                     p++;
  49.                     tag = FALSE;
  50.                 }
  51.                 else continue;
  52.             }
  53.             if (found)
  54.                 while (*p && *p != '<') fputc(*(p++), stdout);
  55.             else
  56.                 while (*p && *p != '<') p++;
  57.             if (!*p) {
  58.                 if (found)
  59.                     fputc(' ', stdout); /* We replace newline with space */
  60.                 continue;
  61.             }
  62.             else if (!found && (!strncmp(p, "<TITLE>", 7) ||
  63.                                 !strncmp(p, "<title>", 7) ||
  64.                                 !strncmp(p, "<Title>", 7))) {
  65.                 p += 7;
  66.                 found = TRUE;
  67.             }
  68.             else if (found && (!strncmp(p, "</TITLE>", 8) ||
  69.                                !strncmp(p, "</title>", 8) ||
  70.                                !strncmp(p, "</Title>", 8))) {
  71.                 fclose(fp);
  72.                 fputc('\n', stdout);
  73.                 exit(0);
  74.             }
  75.             else tag = TRUE;
  76.         } /* while stuff in buffer */
  77.     } /* while not EOF and not very many lines read */
  78.  
  79.     /* If we come here, the title was not found among */
  80.     /* the first few lines. */
  81.     printf("%s\n", argv[2]);  /* Then using filename */
  82.     fclose(fp);
  83. }

Raw Paste


Login or Register to edit or fork this paste. It's free.