C   17

msort.c

Guest on 14th January 2022 06:36:29 PM

  1. /*      $NetBSD: msort.c,v 1.31
  2. kre Exp $       */
  3.  /*- * Copyright (c) The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Ben Harris and Jaromir Dolecek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1993 *     The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Peter McIlroy. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "sort.h" #include "fsort.h" __RCSID("$NetBSD: msort.c,v 1.31 2016/06/01 02:37:55 kre Exp $"); #include <stdlib.h> #include <string.h> #include <unistd.h> #include <util.h> /* Subroutines using comparisons: merge sort and check order */ #define DELETE (1) typedef struct mfile {         FILE *fp;       get_func_t get;         RECHEADER *rec;         u_char *end; } MFILE; static int cmp(RECHEADER *, RECHEADER *); static int insert(struct mfile **, struct mfile *, int, int); static void merge_sort_fstack(FILE *, put_func_t, struct field *); /* * Number of files merge() can merge in one pass. */ #define MERGE_FNUM 16 static struct mfile fstack[MERGE_FNUM]; static struct mfile fstack_1[MERGE_FNUM]; static struct mfile fstack_2[MERGE_FNUM]; static int fstack_count, fstack_1_count, fstack_2_count; void save_for_merge(FILE *fp, get_func_t get, struct field *ftbl) {  FILE *mfp, *mfp1, *mfp2;        if (fstack_count == MERGE_FNUM) {               /* Must reduce the number of temporary files */                 mfp = ftmp();           merge_sort_fstack(mfp, putrec, ftbl);           /* Save output in next layer */                 if (fstack_1_count == MERGE_FNUM) {                     mfp1 = ftmp();                  memcpy(fstack, fstack_1, sizeof fstack);                        merge_sort_fstack(mfp1, putrec, ftbl);                  if (fstack_2_count == MERGE_FNUM) {                             /* More than 4096 files! */                             mfp2 = ftmp();                 
  4.         memcpy(fstack, fstack_2, sizeof fstack);                                merge_sort_fstack(mfp2, putrec, ftbl);                          fstack_2[0].fp = mfp2;                          fstack_2_count = 1;                     }              
  5.         fstack_2[fstack_2_count].fp = mfp1;                     fstack_2[fstack_2_count].get = geteasy;                         fstack_2_count++;                       fstack_1_count = 0;             }              
  6. fstack_1[fstack_1_count].fp = mfp;              fstack_1[fstack_1_count].get = geteasy;                 fstack_1_count++;               fstack_count = 0;       }       fstack[fstack_count].fp = fp;   fstack[fstack_count++].get = get; } void fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) {      
  7. get_func_t get = SINGL_FLD ? makeline : makekey;        FILE *fp;       int i;  for (i = 0; i < nfiles; i++) {          fp = fopen(filelist->names[i], "r");            if (fp == NULL)                         err(2, "%s", filelist->names[i]);               save_for_merge(fp, get, ftbl);  }       merge_sort(outfp, putline, ftbl); } void merge_sort(FILE *outfp, put_func_t put, struct field *ftbl) {  i
  8. nt count = fstack_1_count + fstack_2_count;     FILE *mfp;      int i;  if (count == 0) {               /* All files in initial array */                merge_sort_fstack(outfp, put, ftbl);            return;         }       count += fstack_count;  /* Too many files for one merge sort */         for (;;) {              /* Sort latest 16 files */              i = count;              if (i > MERGE_FNUM)                     i = MERGE_FNUM;                 while (fstack_count > 0)                        fstack[--i] = fstack[--fstack_count];          
  9. while (i > 0 && fstack_1_count > 0)                     fstack[--i] = fstack_1[--fstack_1_count];               while (i > 0)                   fstack[--i] = fstack_2[--fstack_2_count];               if (count <= MERGE_FNUM) {                      /* Got all the data */                  fstack_count = count;                   merge_sort_fstack(outfp, put, ftbl);                    return;                 }               mfp = ftmp();           fstack_count = count > MERGE_FNUM ? MERGE_FNUM : count;                 merge_sort_fstack(mfp, putrec, ftbl);           fstack[0].fp = mfp;             fstack[0].get = geteasy;                fstack_count = 1;               count -= MERGE_FNUM - 1;        } }
  10.  static void merge_sort_fstack(FILE *outfp, put_func_t put, struct field *ftbl) {       struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;     RECHEADER *new_rec;     u_char *new_end;        void *tmp;      int c, i, nfiles;       size_t sz;      /* Read one record from each file (read again if a duplicate) */        for (nfiles = i = 0; i < fstack_count; i++) {           cfile = &fstack[i];             if (cfile->rec == NULL) {                       cfile->rec = allocrec(NULL, DEFLLEN);                   cfile->end = (u_char *)cfile->rec + DEFLLEN;            }               rewind(cfile->fp);              for (;;) {                      c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl);                        if (c == EOF)                           break;                  if (c == BUFFEND) {                             /* Double buffer size */                                sz = (cfile->end - (u_char *)cfile->rec) * 2;                           cfile->rec = allocrec(cfile->rec, sz);                          cfile->end = (u_char *)cfile->rec + sz;                                 continue;                       }                       if (nfiles != 0) {                              if (insert(flist, cfile, nfiles, !DELETE))                                      /* Duplicate removed */                                         continue;                      
  11. } else                          flist[0] = cfile;                       nfiles++;                       break;          }       }       if (nfiles == 0)                return;         /*       * We now loop reading a new record from the file with the       * 'sorted first' existing record.       * As each record is added, the 'first' record is written to the         * output file - maintaining one record from each file in the sorted     * list.         */     new_rec = allocrec(NULL, DEFLLEN);      new_end = (u_char *)new_rec + DEFLLEN;  for (;;) {              cfile = flist[0];               c = cfile->get(cfile->fp, new_rec, new_end, ftbl);              if (c == EOF) {                         /* Write out last record from now-empty input */                        put(cfile->rec, outfp);                         if (--nfiles == 0)                              break;                  /* Replace from file with now-first sorted record. */                   /* (Moving base 'flist' saves copying everything!) */                   flist++;                        continue;               }               if (c == BUFFEND) {                     /* Buffer not large enough - double in size */                  sz = (new_end - (u_char *)new_rec) * 2;                         new_rec = allocrec(new_rec, sz);                        new_end = (u_char *)new_rec +sz;                        continue;               }               /* Swap in new buffer, saving old */            tmp = cfile->rec;               cfile->rec = new_rec;           new_rec = tmp;          tmp = cfile->end;               cfile->end = new_end;           new_end = tmp;          /* Add into sort, removing the original first entry */          c = insert(flist, cfile, nfiles, DELETE);               if (c != 0 || (UNIQUE && cfile == flist[0]                       && cmp(new_rec, cfile->rec) == 0)) {                   /* Was an unwanted duplicate, restore buffer */                         tmp = cfile->rec;                       cfile->rec = new_rec;                   new_rec = tmp;                  tmp = cfile->end;                       cfile->end = new_end;                   new_end = tmp;                  continue;               }               /* Write out 'old' record */            put(new_rec, outfp);    }       free(new_rec); } /* * if delete: inserts rec in flist, deletes flist[0]; * otherwise just inserts *rec in flist. * Returns 1 if record is a duplicate to be ignored. */ static int insert(struct mfile **flist, struct mfile *rec, int ttop, int delete) {      int mid, top = ttop, bot = 0, cmpv = 1;         for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {            cmpv = cmp(rec->rec, flist[mid]->rec);          if (cmpv == 0 ) {                       if (UNIQUE)                             /* Duplicate key, read another record */                                /* NB: This doesn't guarantee to keep any                                * particular record. */                                return 1;                       /*                       * Apply sort by input file order.                       * We could truncate the sort is the fileno are                          * adjacent - but that is all too hard!                          * The fileno cannot be equal, since we only have one                    * record from each file (+ flist[0] which never                         * comes here).                          */                     cmpv = rec < flist[mid] ? -1 : 1;                       if (REVERSE)                            cmpv = -cmpv;           }               if (cmpv < 0)                   top = mid;              else                    bot = mid;      }       /* At this point we haven't yet compared against flist[0] */    if (delete) {           /* flist[0] is ourselves, only the caller knows the old data */                 if (bot != 0) {                         memmove(flist, flist + 1, bot * sizeof(MFILE *));                       flist[bot] = rec;               }               return 0;       }       /* Inserting original set of records */         if (bot == 0 && cmpv != 0) {            /* Doesn't match flist[1], must compare with flist[0] */                cmpv = cmp(rec->rec, flist[0]->rec);            if (cmpv == 0 && UNIQUE)                        return 1;               /* Add matching keys in file order (ie new is later) */                 if (cmpv < 0)                   bot = -1;       }       bot++;  memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *));  flist[bot] = rec;       return 0; } /* * check order on one file */ void order(struct filelist *filelist, struct field *ftbl, int quiet) {      get_func_t get = SINGL_FLD ? makeline : makekey;        RECHEADER *crec, *prec, *trec;  u_char *crec_end, *prec_end, *trec_end;         FILE *fp;       int c;  fp = fopen(filelist->names[0], "r");    if (fp == NULL)                 err(2, "%s", filelist->names[0]);       crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));      crec_end = crec->data + DEFLLEN;        prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));      prec_end = prec->data + DEFLLEN;        /* XXX this does exit(0) for overlong lines */  if (get(fp, prec, prec_end, ftbl) != 0)                 exit(0);        while (get(fp, crec, crec_end, ftbl) == 0) {            if (0 < (c = cmp(prec, crec))) {                        if (quiet)                              exit(1);                        crec->data[crec->length-1] = 0;                         errx(1, "found disorder: %s", crec->data+crec->offset);                 }               if (UNIQUE && !c) {                     if (quiet)                              exit(1);                        crec->data[crec->length-1] = 0;                         errx(1, "found non-uniqueness: %s",                      crec->data+crec->offset);              }               /*               * Swap pointers so that this record is on place pointed                 * to by prec and new record is read to place pointed to by              * crec.                 */             trec = prec;            prec = crec;            crec = trec;            trec_end = prec_end;            prec_end = crec_end;            crec_end = trec_end;    }       exit(0); } static int cmp(RECHEADER *rec1, RECHEADER *rec2) {   int len;        int r;  /* key is weights */    len = min(rec1->keylen, rec2->keylen);  r = memcmp(rec1->data, rec2->data, len);        if (r == 0)             r = rec1->keylen - rec2->keylen;        if (REVERSE)           
  12. r = -r;         return r; }

Raw Paste


Login or Register to edit or fork this paste. It's free.