C   24

fields.c

Guest on 14th January 2022 06:15:32 PM

  1.  
  2. /*     
  3. $NetBSD: fields.c,v 1.33 apb Exp $      */
  4. /*- * Copyright (c)The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Ben Harris and Jaromir Dolecek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1993 *       The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Peter McIlroy. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* Subroutines to generate sort keys. */ #include "sort.h" __RCSID("$NetBSD: fields.c,v 1.33 2013/01/20 10:12:58 apb Exp $");
  5. #define SKIP_BLANKS(ptr) {                                      \       if (BLANK & d_mask[*(ptr)])                             \               while (BLANK & d_mask[*(++(ptr))]);             \ } #define NEXTCOL(pos) {                                              \       if (!SEP_FLAG)                                          \               while (BLANK & l_d_mask[*(++pos)]);             \       while ((*(pos+1) != '\0') && !((FLD_D | REC_D_F) & l_d_mask[*++pos]));\ }                static u_char *enterfield(u_char *, const u_char *, struct field *, int); static u_char *number(u_char *, const u_char *, u_char *, u_char *, int); static u_char *length(u_char *, const u_char *, u_char *, u_char *, int); #define DECIMAL_POINT '.' /* * constructs sort key with leading recheader, followed by the key, * followed by the original line. */ length_t enterkey(RECHEADER *keybuf, const u_char *keybuf_end, u_char *line_data, size_t line_size, struct field fieldtable[])       /* keybuf:       pointer to start of key */ {   int i;  u_char *l_d_mask;       u_char *lineend, *pos;  const u_char *endkey;   u_char *keypos;         struct coldesc *clpos;  int col = 1;    struct field *ftpos;    l_d_mask = d_mask;      pos = line_data - 1;    lineend = line_data + line_size-1;                              /* don't include rec_delimiter */       for (i = 0; i < ncols; i++) {           clpos = clist + i;              for (; (col < clpos->num) && (pos < lineend); col++) {                  NEXTCOL(pos);           }               if (pos >= lineend)                     break;          clpos->start = SEP_FLAG ? pos + 1 : pos;                NEXTCOL(pos);           clpos->end = pos;               col++;          if (pos >= lineend) {                   clpos->end = lineend;                   i++;                    break;          }       }       for (; i <= ncols; i++)                 clist[i].start = clist[i].end = lineend;        if (clist[0].start < line_data)                 clist[0].start++;       /*       * We write the sort keys (concatenated) followed by the         * original line data (for output) as the 'keybuf' data.         * keybuf->length is the number of key bytes + data bytes.       * keybuf->offset is the number of key bytes.    * We add a record separator weight after the key in case        * (as is usual) we need to preserve the order of equal lines,   * and for 'sort -u'.    * The key itself will have had the correct weight applied.      */     keypos = keybuf->data;  endkey = keybuf_end - line_size - 1;    if (endkey <= keypos)           /* No room for any key bytes */                 return 1;       for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) {                if ((keypos = enterfield(keypos, endkey, ftpos,                  fieldtable->flags)) == NULL)                   return (1);     }       keybuf->offset = keypos - keybuf->data;         keybuf->length = keybuf->offset + line_size;    /*       * Posix requires that equal keys be further sorted by the       * entire original record.       * NetBSD has (at least for some time) kept equal keys in        * their original order.         * For 'sort -u' posix_sort is unset.    */     keybuf->keylen = posix_sort ? keybuf->length : keybuf->offset;  memcpy(keypos, line_data, line_size);   return (0); } /* * constructs a field (as defined by -k) within a key */ static u_char * enterfield(u_char *tablepos, const u_char *endkey, struct field *cur_fld, int gflags) {        u_char *start, *end, *lineend, *mask, *lweight;         struct column icol, tcol;       u_int flags;    icol = cur_fld->icol;   tcol = cur_fld->tcol;   flags = cur_fld->flags;         start = icol.p->start;  lineend = clist[ncols].end;     if (flags & BI)                 SKIP_BLANKS(start);     start += icol.indent;   start = min(start, lineend);    if (!tcol.num)          end = lineend;  else {          if (tcol.indent) {                      end = tcol.p->start;                    if (flags & BT)                                 SKIP_BLANKS(end);                       end += tcol.indent;                     end = min(end, lineend);                } else                  end = tcol.p->end;      }       if (flags & L)          return length(tablepos, endkey, start, end, flags);     if (flags & N)          return number(tablepos, endkey, start, end, flags);     /* Bound check space - assuming nothing is skipped */   if (tablepos + (end - start) + 1 >= endkey)             return NULL;    mask = cur_fld->mask;   lweight = cur_fld->weights;             for (; start < end; start++) {          if (!mask || mask[*start]) {                    *tablepos++ = lweight[*start];          }       }       /* Add extra byte (absent from lweight) to sort short keys correctly */         *tablepos++ = lweight[REC_D];   return tablepos; } /* * Numbers are converted to a floating point format (exponent & mantissa) * so that they compare correctly as sequence of unsigned bytes. * Bytes 0x00 and 0xff are used to terminate positive and negative numbers * to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12. * * The first byte contain the overall sign, exponent sign and some of the * exponent. These have to be ordered (-ve value, decreasing exponent), * zero, (+ve value, increasing exponent). * * The first byte is 0x80 for zero, 0xc0 for +ve with exponent 0. * -ve values are the 1's compliments (so 0x7f isn't used!). * * This only leaves 63 byte values for +ve exponents - which isn't enough. * The largest 4 exponent values are used to hold a byte count of the * number of following bytes that contain 8 exponent bits per byte, * This lets us sort exponents from -2^31 to +2^31. * * The mantissa is stored 2 digits per byte offset by 0x40, for negative * numbers the order must be reversed (they are bit inverted). * * Reverse sorts are done by inverting the sign of the number. */ #define MAX_EXP_ENC ((int)sizeof(int)) static u_char * number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, int reverse) {     int exponent = -1;      int had_dp = 0;         u_char *tline;  char ch;        unsigned int val;       u_char *last_nz_pos;    u_char negate;  if (reverse & R)                negate = 0xff;  else            negate = 0;     /* Give ourselves space for the key terminator */       bufend--;       /* Ensure we have enough space for the exponent */      if (pos + 1 + MAX_EXP_ENC > bufend)             return (NULL);  SKIP_BLANKS(line);      if (*line == '-') {     /* set the sign */              negate ^= 0xff;                 line++;         } else if (*line == '+') {              line++;         }       /* eat initial zeroes */        for (; *line == '0' && line < lineend; line++)          continue;       /* calculate exponents */       if (*line == DECIMAL_POINT) {           /* Decimal fraction */          had_dp = 1;             while (*++line == '0' && line < lineend)                        exponent--;     } else {                /* Large (absolute) value, count digits */              for (tline = line; *tline >= '0' &&              *tline <= '9' && tline < lineend; tline++)                     exponent++;     }       /* If the first/next character isn't a digit, value is zero */  if (*line < '1' || *line > '9' || line >= lineend) {            /* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */          /* XXX what about NaN, NAN, inf and INF */              *pos++ = 0x80;          return pos;     }       /* Maybe here we should allow for e+12 (etc) */         if (exponent < 0x40 - MAX_EXP_ENC && -exponent < 0x40 - MAX_EXP_ENC) {          /* Value ok for simple encoding */              /* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */          exponent += 0xc0;               *pos++ = negate ^ exponent;     } else {                /* Out or range for a single byte */            int c, t;               t = exponent > 0 ? exponent : -exponent;                /* Count how many 8-bit bytes are needed */             for (c = 0; ; c++) {                    t >>= 8;                        if (t == 0)                             break;          }               /* 'c' better be 0..3 here - but probably 0..1 */               /* Offset just outside valid range */           t = c + 0x40 - MAX_EXP_ENC;             if (exponent < 0)                       t = -t;                 *pos++ = negate ^ (t + 0xc0);           /* now add each byte, most significant first */                 for (; c >= 0; c--)                     *pos++ = negate ^ (exponent >> (c * 8));        }       /* Finally add mantissa, 2 digits per byte */   for (last_nz_pos = pos; line < lineend; ) {             if (pos >= bufend)                      return NULL;            ch = *line++;           val = (ch - '0') * 10;          if (val > 90) {                         if (ch == DECIMAL_POINT && !had_dp) {                           had_dp = 1;                             continue;                       }                       break;          }               while (line < lineend) {                        ch = *line++;                   if (ch == DECIMAL_POINT && !had_dp) {                           had_dp = 1;                             continue;                       }                       if (ch < '0' || ch > '9')                               line = lineend;                         else                            val += ch - '0';                        break;          }               *pos++ = negate ^ (val + 0x40);                 if (val != 0)                   last_nz_pos = pos;      }       /* Add key terminator, deleting any trailing "00"
  6.  */     *last_nz_pos++ = negate;        return (last_nz_pos); }
  7. static u_char * length(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, int flag)
  8. {       u_char buf[32];         int l;
  9.         SKIP_BLANKS(line);
  10.         l = snprintf((char *)buf, sizeof(buf), "%td", lineend - line);  
  11. return number(pos, bufend, buf, buf + l, flag); }

Raw Paste


Login or Register to edit or fork this paste. It's free.