PYTHON   49

ast.py

Guest on 2nd August 2021 03:28:57 PM

  1. #!/usr/bin/env python
  2. #
  3. # Copyright  Neal Norwitz
  4. # Portions Copyright Google Inc.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. #      http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17.  
  18. """Generate an Abstract Syntax Tree (AST) for C++."""
  19.  
  20. __author__ = 'nnorwitz@google.com (Neal Norwitz)'
  21.  
  22.  
  23. # TODO:
  24. #  * Tokens should never be exported, need to convert to Nodes
  25. #    (return types, parameters, etc.)
  26. #  * Handle static class data for templatized classes
  27. #  * Handle casts (both C++ and C-style)
  28. #  * Handle conditions and loops (if/else, switch, for, while/do)
  29. #
  30. # TODO much, much later:
  31. #  * Handle #define
  32. #  * exceptions
  33.  
  34.  
  35. try:
  36.     # Python 3.x
  37.     import builtins
  38. except ImportError:
  39.     # Python 2.x
  40.     import __builtin__ as builtins
  41.  
  42. import sys
  43. import traceback
  44.  
  45. from cpp import keywords
  46. from cpp import tokenize
  47. from cpp import utils
  48.  
  49.  
  50. if not hasattr(builtins, 'reversed'):
  51.     # Support Python 2.3 and earlier.
  52.     def reversed(seq):
  53.         for i in range(len(seq)-1, -1, -1):
  54.             yield seq[i]
  55.  
  56. if not hasattr(builtins, 'next'):
  57.     # Support Python 2.5 and earlier.
  58.     def next(obj):
  59.         return obj.next()
  60.  
  61.  
  62. VISIBILITY_PUBLIC, VISIBILITY_PROTECTED, VISIBILITY_PRIVATE = range(3)
  63.  
  64. FUNCTION_NONE = 0x00
  65. FUNCTION_CONST = 0x01
  66. FUNCTION_VIRTUAL = 0x02
  67. FUNCTION_PURE_VIRTUAL = 0x04
  68. FUNCTION_CTOR = 0x08
  69. FUNCTION_DTOR = 0x10
  70. FUNCTION_ATTRIBUTE = 0x20
  71. FUNCTION_UNKNOWN_ANNOTATION = 0x40
  72. FUNCTION_THROW = 0x80
  73. FUNCTION_OVERRIDE = 0x100
  74.  
  75. """
  76. These are currently unused.  Should really handle these properly at some point.
  77.  
  78. TYPE_MODIFIER_INLINE   = 0x010000
  79. TYPE_MODIFIER_EXTERN   = 0x020000
  80. TYPE_MODIFIER_STATIC   = 0x040000
  81. TYPE_MODIFIER_CONST    = 0x080000
  82. TYPE_MODIFIER_REGISTER = 0x100000
  83. TYPE_MODIFIER_VOLATILE = 0x200000
  84. TYPE_MODIFIER_MUTABLE  = 0x400000
  85.  
  86. TYPE_MODIFIER_MAP = {
  87.    'inline': TYPE_MODIFIER_INLINE,
  88.    'extern': TYPE_MODIFIER_EXTERN,
  89.    'static': TYPE_MODIFIER_STATIC,
  90.    'const': TYPE_MODIFIER_CONST,
  91.    'register': TYPE_MODIFIER_REGISTER,
  92.    'volatile': TYPE_MODIFIER_VOLATILE,
  93.    'mutable': TYPE_MODIFIER_MUTABLE,
  94.    }
  95. """
  96.  
  97. _INTERNAL_TOKEN = 'internal'
  98. _NAMESPACE_POP = 'ns-pop'
  99.  
  100.  
  101. # TODO(nnorwitz): use this as a singleton for templated_types, etc
  102. # where we don't want to create a new empty dict each time.  It is also const.
  103. class _NullDict(object):
  104.     __contains__ = lambda self: False
  105.     keys = values = items = iterkeys = itervalues = iteritems = lambda self: ()
  106.  
  107.  
  108. # TODO(nnorwitz): move AST nodes into a separate module.
  109. class Node(object):
  110.     """Base AST node."""
  111.  
  112.     def __init__(self, start, end):
  113.         self.start = start
  114.         self.end = end
  115.  
  116.     def IsDeclaration(self):
  117.         """Returns bool if this node is a declaration."""
  118.         return False
  119.  
  120.     def IsDefinition(self):
  121.         """Returns bool if this node is a definition."""
  122.         return False
  123.  
  124.     def IsExportable(self):
  125.         """Returns bool if this node exportable from a header file."""
  126.         return False
  127.  
  128.     def Requires(self, node):
  129.         """Does this AST node require the definition of the node passed in?"""
  130.         return False
  131.  
  132.     def XXX__str__(self):
  133.         return self._StringHelper(self.__class__.__name__, '')
  134.  
  135.     def _StringHelper(self, name, suffix):
  136.         if not utils.DEBUG:
  137.             return '%s(%s)' % (name, suffix)
  138.         return '%s(%d, %d, %s)' % (name, self.start, self.end, suffix)
  139.  
  140.     def __repr__(self):
  141.         return str(self)
  142.  
  143.  
  144. class Define(Node):
  145.     def __init__(self, start, end, name, definition):
  146.         Node.__init__(self, start, end)
  147.         self.name = name
  148.         self.definition = definition
  149.  
  150.     def __str__(self):
  151.         value = '%s %s' % (self.name, self.definition)
  152.         return self._StringHelper(self.__class__.__name__, value)
  153.  
  154.  
  155. class Include(Node):
  156.     def __init__(self, start, end, filename, system):
  157.         Node.__init__(self, start, end)
  158.         self.filename = filename
  159.         self.system = system
  160.  
  161.     def __str__(self):
  162.         fmt = '"%s"'
  163.         if self.system:
  164.             fmt = '<%s>'
  165.         return self._StringHelper(self.__class__.__name__, fmt % self.filename)
  166.  
  167.  
  168. class Goto(Node):
  169.     def __init__(self, start, end, label):
  170.         Node.__init__(self, start, end)
  171.         self.label = label
  172.  
  173.     def __str__(self):
  174.         return self._StringHelper(self.__class__.__name__, str(self.label))
  175.  
  176.  
  177. class Expr(Node):
  178.     def __init__(self, start, end, expr):
  179.         Node.__init__(self, start, end)
  180.         self.expr = expr
  181.  
  182.     def Requires(self, node):
  183.         # TODO(nnorwitz): impl.
  184.         return False
  185.  
  186.     def __str__(self):
  187.         return self._StringHelper(self.__class__.__name__, str(self.expr))
  188.  
  189.  
  190. class Return(Expr):
  191.     pass
  192.  
  193.  
  194. class Delete(Expr):
  195.     pass
  196.  
  197.  
  198. class Friend(Expr):
  199.     def __init__(self, start, end, expr, namespace):
  200.         Expr.__init__(self, start, end, expr)
  201.         self.namespace = namespace[:]
  202.  
  203.  
  204. class Using(Node):
  205.     def __init__(self, start, end, names):
  206.         Node.__init__(self, start, end)
  207.         self.names = names
  208.  
  209.     def __str__(self):
  210.         return self._StringHelper(self.__class__.__name__, str(self.names))
  211.  
  212.  
  213. class Parameter(Node):
  214.     def __init__(self, start, end, name, parameter_type, default):
  215.         Node.__init__(self, start, end)
  216.         self.name = name
  217.         self.type = parameter_type
  218.         self.default = default
  219.  
  220.     def Requires(self, node):
  221.         # TODO(nnorwitz): handle namespaces, etc.
  222.         return self.type.name == node.name
  223.  
  224.     def __str__(self):
  225.         name = str(self.type)
  226.         suffix = '%s %s' % (name, self.name)
  227.         if self.default:
  228.             suffix += ' = ' + ''.join([d.name for d in self.default])
  229.         return self._StringHelper(self.__class__.__name__, suffix)
  230.  
  231.  
  232. class _GenericDeclaration(Node):
  233.     def __init__(self, start, end, name, namespace):
  234.         Node.__init__(self, start, end)
  235.         self.name = name
  236.         self.namespace = namespace[:]
  237.  
  238.     def FullName(self):
  239.         prefix = ''
  240.         if self.namespace and self.namespace[-1]:
  241.             prefix = '::'.join(self.namespace) + '::'
  242.         return prefix + self.name
  243.  
  244.     def _TypeStringHelper(self, suffix):
  245.         if self.namespace:
  246.             names = [n or '<anonymous>' for n in self.namespace]
  247.             suffix += ' in ' + '::'.join(names)
  248.         return self._StringHelper(self.__class__.__name__, suffix)
  249.  
  250.  
  251. # TODO(nnorwitz): merge with Parameter in some way?
  252. class VariableDeclaration(_GenericDeclaration):
  253.     def __init__(self, start, end, name, var_type, initial_value, namespace):
  254.         _GenericDeclaration.__init__(self, start, end, name, namespace)
  255.         self.type = var_type
  256.         self.initial_value = initial_value
  257.  
  258.     def Requires(self, node):
  259.         # TODO(nnorwitz): handle namespaces, etc.
  260.         return self.type.name == node.name
  261.  
  262.     def ToString(self):
  263.         """Return a string that tries to reconstitute the variable decl."""
  264.         suffix = '%s %s' % (self.type, self.name)
  265.         if self.initial_value:
  266.             suffix += ' = ' + self.initial_value
  267.         return suffix
  268.  
  269.     def __str__(self):
  270.         return self._StringHelper(self.__class__.__name__, self.ToString())
  271.  
  272.  
  273. class Typedef(_GenericDeclaration):
  274.     def __init__(self, start, end, name, alias, namespace):
  275.         _GenericDeclaration.__init__(self, start, end, name, namespace)
  276.         self.alias = alias
  277.  
  278.     def IsDefinition(self):
  279.         return True
  280.  
  281.     def IsExportable(self):
  282.         return True
  283.  
  284.     def Requires(self, node):
  285.         # TODO(nnorwitz): handle namespaces, etc.
  286.         name = node.name
  287.         for token in self.alias:
  288.             if token is not None and name == token.name:
  289.                 return True
  290.         return False
  291.  
  292.     def __str__(self):
  293.         suffix = '%s, %s' % (self.name, self.alias)
  294.         return self._TypeStringHelper(suffix)
  295.  
  296.  
  297. class _NestedType(_GenericDeclaration):
  298.     def __init__(self, start, end, name, fields, namespace):
  299.         _GenericDeclaration.__init__(self, start, end, name, namespace)
  300.         self.fields = fields
  301.  
  302.     def IsDefinition(self):
  303.         return True
  304.  
  305.     def IsExportable(self):
  306.         return True
  307.  
  308.     def __str__(self):
  309.         suffix = '%s, {%s}' % (self.name, self.fields)
  310.         return self._TypeStringHelper(suffix)
  311.  
  312.  
  313. class Union(_NestedType):
  314.     pass
  315.  
  316.  
  317. class Enum(_NestedType):
  318.     pass
  319.  
  320.  
  321. class Class(_GenericDeclaration):
  322.     def __init__(self, start, end, name, bases, templated_types, body, namespace):
  323.         _GenericDeclaration.__init__(self, start, end, name, namespace)
  324.         self.bases = bases
  325.         self.body = body
  326.         self.templated_types = templated_types
  327.  
  328.     def IsDeclaration(self):
  329.         return self.bases is None and self.body is None
  330.  
  331.     def IsDefinition(self):
  332.         return not self.IsDeclaration()
  333.  
  334.     def IsExportable(self):
  335.         return not self.IsDeclaration()
  336.  
  337.     def Requires(self, node):
  338.         # TODO(nnorwitz): handle namespaces, etc.
  339.         if self.bases:
  340.             for token_list in self.bases:
  341.                 # TODO(nnorwitz): bases are tokens, do name comparison.
  342.                 for token in token_list:
  343.                     if token.name == node.name:
  344.                         return True
  345.         # TODO(nnorwitz): search in body too.
  346.         return False
  347.  
  348.     def __str__(self):
  349.         name = self.name
  350.         if self.templated_types:
  351.             name += '<%s>' % self.templated_types
  352.         suffix = '%s, %s, %s' % (name, self.bases, self.body)
  353.         return self._TypeStringHelper(suffix)
  354.  
  355.  
  356. class Struct(Class):
  357.     pass
  358.  
  359.  
  360. class Function(_GenericDeclaration):
  361.     def __init__(self, start, end, name, return_type, parameters,
  362.                  modifiers, templated_types, body, namespace):
  363.         _GenericDeclaration.__init__(self, start, end, name, namespace)
  364.         converter = TypeConverter(namespace)
  365.         self.return_type = converter.CreateReturnType(return_type)
  366.         self.parameters = converter.ToParameters(parameters)
  367.         self.modifiers = modifiers
  368.         self.body = body
  369.         self.templated_types = templated_types
  370.  
  371.     def IsDeclaration(self):
  372.         return self.body is None
  373.  
  374.     def IsDefinition(self):
  375.         return self.body is not None
  376.  
  377.     def IsExportable(self):
  378.         if self.return_type and 'static' in self.return_type.modifiers:
  379.             return False
  380.         return None not in self.namespace
  381.  
  382.     def Requires(self, node):
  383.         if self.parameters:
  384.             # TODO(nnorwitz): parameters are tokens, do name comparison.
  385.             for p in self.parameters:
  386.                 if p.name == node.name:
  387.                     return True
  388.         # TODO(nnorwitz): search in body too.
  389.         return False
  390.  
  391.     def __str__(self):
  392.         # TODO(nnorwitz): add templated_types.
  393.         suffix = ('%s %s(%s), 0x%02x, %s' %
  394.                   (self.return_type, self.name, self.parameters,
  395.                    self.modifiers, self.body))
  396.         return self._TypeStringHelper(suffix)
  397.  
  398.  
  399. class Method(Function):
  400.     def __init__(self, start, end, name, in_class, return_type, parameters,
  401.                  modifiers, templated_types, body, namespace):
  402.         Function.__init__(self, start, end, name, return_type, parameters,
  403.                           modifiers, templated_types, body, namespace)
  404.         # TODO(nnorwitz): in_class could also be a namespace which can
  405.         # mess up finding functions properly.
  406.         self.in_class = in_class
  407.  
  408.  
  409. class Type(_GenericDeclaration):
  410.     """Type used for any variable (eg class, primitive, struct, etc)."""
  411.  
  412.     def __init__(self, start, end, name, templated_types, modifiers,
  413.                  reference, pointer, array):
  414.         """
  415.        Args:
  416.          name: str name of main type
  417.          templated_types: [Class (Type?)] template type info between <>
  418.          modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
  419.          reference, pointer, array: bools
  420.        """
  421.         _GenericDeclaration.__init__(self, start, end, name, [])
  422.         self.templated_types = templated_types
  423.         if not name and modifiers:
  424.             self.name = modifiers.pop()
  425.         self.modifiers = modifiers
  426.         self.reference = reference
  427.         self.pointer = pointer
  428.         self.array = array
  429.  
  430.     def __str__(self):
  431.         prefix = ''
  432.         if self.modifiers:
  433.             prefix = ' '.join(self.modifiers) + ' '
  434.         name = str(self.name)
  435.         if self.templated_types:
  436.             name += '<%s>' % self.templated_types
  437.         suffix = prefix + name
  438.         if self.reference:
  439.             suffix += '&'
  440.         if self.pointer:
  441.             suffix += '*'
  442.         if self.array:
  443.             suffix += '[]'
  444.         return self._TypeStringHelper(suffix)
  445.  
  446.     # By definition, Is* are always False.  A Type can only exist in
  447.     # some sort of variable declaration, parameter, or return value.
  448.     def IsDeclaration(self):
  449.         return False
  450.  
  451.     def IsDefinition(self):
  452.         return False
  453.  
  454.     def IsExportable(self):
  455.         return False
  456.  
  457.  
  458. class TypeConverter(object):
  459.  
  460.     def __init__(self, namespace_stack):
  461.         self.namespace_stack = namespace_stack
  462.  
  463.     def _GetTemplateEnd(self, tokens, start):
  464.         count = 1
  465.         end = start
  466.         while 1:
  467.             token = tokens[end]
  468.             end += 1
  469.             if token.name == '<':
  470.                 count += 1
  471.             elif token.name == '>':
  472.                 count -= 1
  473.                 if count == 0:
  474.                     break
  475.         return tokens[start:end-1], end
  476.  
  477.     def ToType(self, tokens):
  478.         """Convert [Token,...] to [Class(...), ] useful for base classes.
  479.        For example, code like class Foo : public Bar<x, y> { ... };
  480.        the "Bar<x, y>" portion gets converted to an AST.
  481.  
  482.        Returns:
  483.          [Class(...), ...]
  484.        """
  485.         result = []
  486.         name_tokens = []
  487.         reference = pointer = array = False
  488.  
  489.         def AddType(templated_types):
  490.             # Partition tokens into name and modifier tokens.
  491.             names = []
  492.             modifiers = []
  493.             for t in name_tokens:
  494.                 if keywords.IsKeyword(t.name):
  495.                     modifiers.append(t.name)
  496.                 else:
  497.                     names.append(t.name)
  498.             name = ''.join(names)
  499.             if name_tokens:
  500.                 result.append(Type(name_tokens[0].start, name_tokens[-1].end,
  501.                                    name, templated_types, modifiers,
  502.                                    reference, pointer, array))
  503.             del name_tokens[:]
  504.  
  505.         i = 0
  506.         end = len(tokens)
  507.         while i < end:
  508.             token = tokens[i]
  509.             if token.name == '<':
  510.                 new_tokens, new_end = self._GetTemplateEnd(tokens, i+1)
  511.                 AddType(self.ToType(new_tokens))
  512.                 # If there is a comma after the template, we need to consume
  513.                 # that here otherwise it becomes part of the name.
  514.                 i = new_end
  515.                 reference = pointer = array = False
  516.             elif token.name == ',':
  517.                 AddType([])
  518.                 reference = pointer = array = False
  519.             elif token.name == '*':
  520.                 pointer = True
  521.             elif token.name == '&':
  522.                 reference = True
  523.             elif token.name == '[':
  524.                pointer = True
  525.             elif token.name == ']':
  526.                 pass
  527.             else:
  528.                 name_tokens.append(token)
  529.             i += 1
  530.  
  531.         if name_tokens:
  532.             # No '<' in the tokens, just a simple name and no template.
  533.             AddType([])
  534.         return result
  535.  
  536.     def DeclarationToParts(self, parts, needs_name_removed):
  537.         name = None
  538.         default = []
  539.         if needs_name_removed:
  540.             # Handle default (initial) values properly.
  541.             for i, t in enumerate(parts):
  542.                 if t.name == '=':
  543.                     default = parts[i+1:]
  544.                     name = parts[i-1].name
  545.                     if name == ']' and parts[i-2].name == '[':
  546.                         name = parts[i-3].name
  547.                         i -= 1
  548.                     parts = parts[:i-1]
  549.                     break
  550.             else:
  551.                 if parts[-1].token_type == tokenize.NAME:
  552.                     name = parts.pop().name
  553.                 else:
  554.                     # TODO(nnorwitz): this is a hack that happens for code like
  555.                     # Register(Foo<T>); where it thinks this is a function call
  556.                     # but it's actually a declaration.
  557.                     name = '???'
  558.         modifiers = []
  559.         type_name = []
  560.         other_tokens = []
  561.         templated_types = []
  562.         i = 0
  563.         end = len(parts)
  564.         while i < end:
  565.             p = parts[i]
  566.             if keywords.IsKeyword(p.name):
  567.                 modifiers.append(p.name)
  568.             elif p.name == '<':
  569.                 templated_tokens, new_end = self._GetTemplateEnd(parts, i+1)
  570.                 templated_types = self.ToType(templated_tokens)
  571.                 i = new_end - 1
  572.                 # Don't add a spurious :: to data members being initialized.
  573.                 next_index = i + 1
  574.                 if next_index < end and parts[next_index].name == '::':
  575.                     i += 1
  576.             elif p.name in ('[', ']', '='):
  577.                 # These are handled elsewhere.
  578.                 other_tokens.append(p)
  579.             elif p.name not in ('*', '&', '>'):
  580.                 # Ensure that names have a space between them.
  581.                 if (type_name and type_name[-1].token_type == tokenize.NAME and
  582.                     p.token_type == tokenize.NAME):
  583.                     type_name.append(tokenize.Token(tokenize.SYNTAX, ' ', 0, 0))
  584.                 type_name.append(p)
  585.             else:
  586.                 other_tokens.append(p)
  587.             i += 1
  588.         type_name = ''.join([t.name for t in type_name])
  589.         return name, type_name, templated_types, modifiers, default, other_tokens
  590.  
  591.     def ToParameters(self, tokens):
  592.         if not tokens:
  593.             return []
  594.  
  595.         result = []
  596.         name = type_name = ''
  597.         type_modifiers = []
  598.         pointer = reference = array = False
  599.         first_token = None
  600.         default = []
  601.  
  602.         def AddParameter(end):
  603.             if default:
  604.                 del default[0]  # Remove flag.
  605.             parts = self.DeclarationToParts(type_modifiers, True)
  606.             (name, type_name, templated_types, modifiers,
  607.              unused_default, unused_other_tokens) = parts
  608.             parameter_type = Type(first_token.start, first_token.end,
  609.                                   type_name, templated_types, modifiers,
  610.                                   reference, pointer, array)
  611.             p = Parameter(first_token.start, end, name,
  612.                           parameter_type, default)
  613.             result.append(p)
  614.  
  615.         template_count = 0
  616.         for s in tokens:
  617.             if not first_token:
  618.                 first_token = s
  619.             if s.name == '<':
  620.                 template_count += 1
  621.             elif s.name == '>':
  622.                 template_count -= 1
  623.             if template_count > 0:
  624.                 type_modifiers.append(s)
  625.                 continue
  626.  
  627.             if s.name == ',':
  628.                 AddParameter(s.start)
  629.                 name = type_name = ''
  630.                 type_modifiers = []
  631.                 pointer = reference = array = False
  632.                 first_token = None
  633.                 default = []
  634.             elif s.name == '*':
  635.                 pointer = True
  636.             elif s.name == '&':
  637.                 reference = True
  638.             elif s.name == '[':
  639.                 array = True
  640.             elif s.name == ']':
  641.                 pass  # Just don't add to type_modifiers.
  642.             elif s.name == '=':
  643.                 # Got a default value.  Add any value (None) as a flag.
  644.                 default.append(None)
  645.             elif default:
  646.                 default.append(s)
  647.             else:
  648.                 type_modifiers.append(s)
  649.         AddParameter(tokens[-1].end)
  650.         return result
  651.  
  652.     def CreateReturnType(self, return_type_seq):
  653.         if not return_type_seq:
  654.             return None
  655.         start = return_type_seq[0].start
  656.         end = return_type_seq[-1].end
  657.         _, name, templated_types, modifiers, default, other_tokens = \
  658.            self.DeclarationToParts(return_type_seq, False)
  659.         names = [n.name for n in other_tokens]
  660.         reference = '&' in names
  661.         pointer = '*' in names
  662.         array = '[' in names
  663.         return Type(start, end, name, templated_types, modifiers,
  664.                     reference, pointer, array)
  665.  
  666.     def GetTemplateIndices(self, names):
  667.         # names is a list of strings.
  668.         start = names.index('<')
  669.         end = len(names) - 1
  670.         while end > 0:
  671.             if names[end] == '>':
  672.                 break
  673.             end -= 1
  674.         return start, end+1
  675.  
  676. class AstBuilder(object):
  677.     def __init__(self, token_stream, filename, in_class='', visibility=None,
  678.                  namespace_stack=[]):
  679.         self.tokens = token_stream
  680.         self.filename = filename
  681.         # TODO(nnorwitz): use a better data structure (deque) for the queue.
  682.         # Switching directions of the "queue" improved perf by about 25%.
  683.         # Using a deque should be even better since we access from both sides.
  684.         self.token_queue = []
  685.         self.namespace_stack = namespace_stack[:]
  686.         self.in_class = in_class
  687.         if in_class is None:
  688.             self.in_class_name_only = None
  689.         else:
  690.             self.in_class_name_only = in_class.split('::')[-1]
  691.         self.visibility = visibility
  692.         self.in_function = False
  693.         self.current_token = None
  694.         # Keep the state whether we are currently handling a typedef or not.
  695.         self._handling_typedef = False
  696.  
  697.         self.converter = TypeConverter(self.namespace_stack)
  698.  
  699.     def HandleError(self, msg, token):
  700.         printable_queue = list(reversed(self.token_queue[-20:]))
  701.         sys.stderr.write('Got %s in %s @ %s %s\n' %
  702.                          (msg, self.filename, token, printable_queue))
  703.  
  704.     def Generate(self):
  705.         while 1:
  706.             token = self._GetNextToken()
  707.             if not token:
  708.                 break
  709.  
  710.             # Get the next token.
  711.             self.current_token = token
  712.  
  713.             # Dispatch on the next token type.
  714.             if token.token_type == _INTERNAL_TOKEN:
  715.                 if token.name == _NAMESPACE_POP:
  716.                     self.namespace_stack.pop()
  717.                 continue
  718.  
  719.             try:
  720.                 result = self._GenerateOne(token)
  721.                 if result is not None:
  722.                     yield result
  723.             except:
  724.                 self.HandleError('exception', token)
  725.                 raise
  726.  
  727.     def _CreateVariable(self, pos_token, name, type_name, type_modifiers,
  728.                         ref_pointer_name_seq, templated_types, value=None):
  729.         reference = '&' in ref_pointer_name_seq
  730.         pointer = '*' in ref_pointer_name_seq
  731.         array = '[' in ref_pointer_name_seq
  732.         var_type = Type(pos_token.start, pos_token.end, type_name,
  733.                         templated_types, type_modifiers,
  734.                         reference, pointer, array)
  735.         return VariableDeclaration(pos_token.start, pos_token.end,
  736.                                    name, var_type, value, self.namespace_stack)
  737.  
  738.     def _GenerateOne(self, token):
  739.         if token.token_type == tokenize.NAME:
  740.             if (keywords.IsKeyword(token.name) and
  741.                 not keywords.IsBuiltinType(token.name)):
  742.                 method = getattr(self, 'handle_' + token.name)
  743.                 return method()
  744.             elif token.name == self.in_class_name_only:
  745.                 # The token name is the same as the class, must be a ctor if
  746.                 # there is a paren.  Otherwise, it's the return type.
  747.                 # Peek ahead to get the next token to figure out which.
  748.                 next = self._GetNextToken()
  749.                 self._AddBackToken(next)
  750.                 if next.token_type == tokenize.SYNTAX and next.name == '(':
  751.                     return self._GetMethod([token], FUNCTION_CTOR, None, True)
  752.                 # Fall through--handle like any other method.
  753.  
  754.             # Handle data or function declaration/definition.
  755.             syntax = tokenize.SYNTAX
  756.             temp_tokens, last_token = \
  757.                 self._GetVarTokensUpTo(syntax, '(', ';', '{', '[')
  758.             temp_tokens.insert(0, token)
  759.             if last_token.name == '(':
  760.                 # If there is an assignment before the paren,
  761.                 # this is an expression, not a method.
  762.                 expr = bool([e for e in temp_tokens if e.name == '='])
  763.                 if expr:
  764.                     new_temp = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  765.                     temp_tokens.append(last_token)
  766.                     temp_tokens.extend(new_temp)
  767.                     last_token = tokenize.Token(tokenize.SYNTAX, ';', 0, 0)
  768.  
  769.             if last_token.name == '[':
  770.                 # Handle array, this isn't a method, unless it's an operator.
  771.                 # TODO(nnorwitz): keep the size somewhere.
  772.                 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
  773.                 temp_tokens.append(last_token)
  774.                 if temp_tokens[-2].name == 'operator':
  775.                     temp_tokens.append(self._GetNextToken())
  776.                 else:
  777.                     temp_tokens2, last_token = \
  778.                         self._GetVarTokensUpTo(tokenize.SYNTAX, ';')
  779.                     temp_tokens.extend(temp_tokens2)
  780.  
  781.             if last_token.name == ';':
  782.                 # Handle data, this isn't a method.
  783.                 parts = self.converter.DeclarationToParts(temp_tokens, True)
  784.                 (name, type_name, templated_types, modifiers, default,
  785.                  unused_other_tokens) = parts
  786.  
  787.                 t0 = temp_tokens[0]
  788.                 names = [t.name for t in temp_tokens]
  789.                 if templated_types:
  790.                     start, end = self.converter.GetTemplateIndices(names)
  791.                     names = names[:start] + names[end:]
  792.                 default = ''.join([t.name for t in default])
  793.                 return self._CreateVariable(t0, name, type_name, modifiers,
  794.                                             names, templated_types, default)
  795.             if last_token.name == '{':
  796.                 self._AddBackTokens(temp_tokens[1:])
  797.                 self._AddBackToken(last_token)
  798.                 method_name = temp_tokens[0].name
  799.                 method = getattr(self, 'handle_' + method_name, None)
  800.                 if not method:
  801.                     # Must be declaring a variable.
  802.                     # TODO(nnorwitz): handle the declaration.
  803.                     return None
  804.                 return method()
  805.             return self._GetMethod(temp_tokens, 0, None, False)
  806.         elif token.token_type == tokenize.SYNTAX:
  807.             if token.name == '~' and self.in_class:
  808.                 # Must be a dtor (probably not in method body).
  809.                 token = self._GetNextToken()
  810.                 # self.in_class can contain A::Name, but the dtor will only
  811.                 # be Name.  Make sure to compare against the right value.
  812.                 if (token.token_type == tokenize.NAME and
  813.                     token.name == self.in_class_name_only):
  814.                     return self._GetMethod([token], FUNCTION_DTOR, None, True)
  815.             # TODO(nnorwitz): handle a lot more syntax.
  816.         elif token.token_type == tokenize.PREPROCESSOR:
  817.             # TODO(nnorwitz): handle more preprocessor directives.
  818.             # token starts with a #, so remove it and strip whitespace.
  819.             name = token.name[1:].lstrip()
  820.             if name.startswith('include'):
  821.                 # Remove "include".
  822.                 name = name[7:].strip()
  823.                 assert name
  824.                 # Handle #include \<newline> "header-on-second-line.h".
  825.                 if name.startswith('\\'):
  826.                     name = name[1:].strip()
  827.                 assert name[0] in '<"', token
  828.                 assert name[-1] in '>"', token
  829.                 system = name[0] == '<'
  830.                 filename = name[1:-1]
  831.                 return Include(token.start, token.end, filename, system)
  832.             if name.startswith('define'):
  833.                 # Remove "define".
  834.                 name = name[6:].strip()
  835.                 assert name
  836.                 value = ''
  837.                 for i, c in enumerate(name):
  838.                     if c.isspace():
  839.                         value = name[i:].lstrip()
  840.                         name = name[:i]
  841.                         break
  842.                 return Define(token.start, token.end, name, value)
  843.             if name.startswith('if') and name[2:3].isspace():
  844.                 condition = name[3:].strip()
  845.                 if condition.startswith('0') or condition.startswith('(0)'):
  846.                     self._SkipIf0Blocks()
  847.         return None
  848.  
  849.     def _GetTokensUpTo(self, expected_token_type, expected_token):
  850.         return self._GetVarTokensUpTo(expected_token_type, expected_token)[0]
  851.  
  852.     def _GetVarTokensUpTo(self, expected_token_type, *expected_tokens):
  853.         last_token = self._GetNextToken()
  854.         tokens = []
  855.         while (last_token.token_type != expected_token_type or
  856.                last_token.name not in expected_tokens):
  857.             tokens.append(last_token)
  858.             last_token = self._GetNextToken()
  859.         return tokens, last_token
  860.  
  861.     # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
  862.     def _IgnoreUpTo(self, token_type, token):
  863.         unused_tokens = self._GetTokensUpTo(token_type, token)
  864.  
  865.     def _SkipIf0Blocks(self):
  866.         count = 1
  867.         while 1:
  868.             token = self._GetNextToken()
  869.             if token.token_type != tokenize.PREPROCESSOR:
  870.                 continue
  871.  
  872.             name = token.name[1:].lstrip()
  873.             if name.startswith('endif'):
  874.                 count -= 1
  875.                 if count == 0:
  876.                     break
  877.             elif name.startswith('if'):
  878.                 count += 1
  879.  
  880.     def _GetMatchingChar(self, open_paren, close_paren, GetNextToken=None):
  881.         if GetNextToken is None:
  882.             GetNextToken = self._GetNextToken
  883.         # Assumes the current token is open_paren and we will consume
  884.         # and return up to the close_paren.
  885.         count = 1
  886.         token = GetNextToken()
  887.         while 1:
  888.             if token.token_type == tokenize.SYNTAX:
  889.                 if token.name == open_paren:
  890.                     count += 1
  891.                 elif token.name == close_paren:
  892.                     count -= 1
  893.                     if count == 0:
  894.                         break
  895.             yield token
  896.             token = GetNextToken()
  897.         yield token
  898.  
  899.     def _GetParameters(self):
  900.         return self._GetMatchingChar('(', ')')
  901.  
  902.     def GetScope(self):
  903.         return self._GetMatchingChar('{', '}')
  904.  
  905.     def _GetNextToken(self):
  906.         if self.token_queue:
  907.             return self.token_queue.pop()
  908.         return next(self.tokens)
  909.  
  910.     def _AddBackToken(self, token):
  911.         if token.whence == tokenize.WHENCE_STREAM:
  912.             token.whence = tokenize.WHENCE_QUEUE
  913.             self.token_queue.insert(0, token)
  914.         else:
  915.             assert token.whence == tokenize.WHENCE_QUEUE, token
  916.             self.token_queue.append(token)
  917.  
  918.     def _AddBackTokens(self, tokens):
  919.         if tokens:
  920.             if tokens[-1].whence == tokenize.WHENCE_STREAM:
  921.                 for token in tokens:
  922.                     token.whence = tokenize.WHENCE_QUEUE
  923.                 self.token_queue[:0] = reversed(tokens)
  924.             else:
  925.                 assert tokens[-1].whence == tokenize.WHENCE_QUEUE, tokens
  926.                 self.token_queue.extend(reversed(tokens))
  927.  
  928.     def GetName(self, seq=None):
  929.         """Returns ([tokens], next_token_info)."""
  930.         GetNextToken = self._GetNextToken
  931.         if seq is not None:
  932.             it = iter(seq)
  933.             GetNextToken = lambda: next(it)
  934.         next_token = GetNextToken()
  935.         tokens = []
  936.         last_token_was_name = False
  937.         while (next_token.token_type == tokenize.NAME or
  938.                (next_token.token_type == tokenize.SYNTAX and
  939.                 next_token.name in ('::', '<'))):
  940.             # Two NAMEs in a row means the identifier should terminate.
  941.             # It's probably some sort of variable declaration.
  942.             if last_token_was_name and next_token.token_type == tokenize.NAME:
  943.                 break
  944.             last_token_was_name = next_token.token_type == tokenize.NAME
  945.             tokens.append(next_token)
  946.             # Handle templated names.
  947.             if next_token.name == '<':
  948.                 tokens.extend(self._GetMatchingChar('<', '>', GetNextToken))
  949.                 last_token_was_name = True
  950.             next_token = GetNextToken()
  951.         return tokens, next_token
  952.  
  953.     def GetMethod(self, modifiers, templated_types):
  954.         return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')
  955.         assert len(return_type_and_name) >= 1
  956.         return self._GetMethod(return_type_and_name, modifiers, templated_types,
  957.                                False)
  958.  
  959.     def _GetMethod(self, return_type_and_name, modifiers, templated_types,
  960.                    get_paren):
  961.         template_portion = None
  962.         if get_paren:
  963.             token = self._GetNextToken()
  964.             assert token.token_type == tokenize.SYNTAX, token
  965.             if token.name == '<':
  966.                 # Handle templatized dtors.
  967.                 template_portion = [token]
  968.                 template_portion.extend(self._GetMatchingChar('<', '>'))
  969.                 token = self._GetNextToken()
  970.             assert token.token_type == tokenize.SYNTAX, token
  971.             assert token.name == '(', token
  972.  
  973.         name = return_type_and_name.pop()
  974.         # Handle templatized ctors.
  975.         if name.name == '>':
  976.             index = 1
  977.             while return_type_and_name[index].name != '<':
  978.                 index += 1
  979.             template_portion = return_type_and_name[index:] + [name]
  980.             del return_type_and_name[index:]
  981.             name = return_type_and_name.pop()
  982.         elif name.name == ']':
  983.             rt = return_type_and_name
  984.             assert rt[-1].name == '[', return_type_and_name
  985.             assert rt[-2].name == 'operator', return_type_and_name
  986.             name_seq = return_type_and_name[-2:]
  987.             del return_type_and_name[-2:]
  988.             name = tokenize.Token(tokenize.NAME, 'operator[]',
  989.                                   name_seq[0].start, name.end)
  990.             # Get the open paren so _GetParameters() below works.
  991.             unused_open_paren = self._GetNextToken()
  992.  
  993.         # TODO(nnorwitz): store template_portion.
  994.         return_type = return_type_and_name
  995.         indices = name
  996.         if return_type:
  997.             indices = return_type[0]
  998.  
  999.         # Force ctor for templatized ctors.
  1000.         if name.name == self.in_class and not modifiers:
  1001.             modifiers |= FUNCTION_CTOR
  1002.         parameters = list(self._GetParameters())
  1003.         del parameters[-1]              # Remove trailing ')'.
  1004.  
  1005.         # Handling operator() is especially weird.
  1006.         if name.name == 'operator' and not parameters:
  1007.             token = self._GetNextToken()
  1008.             assert token.name == '(', token
  1009.             parameters = list(self._GetParameters())
  1010.             del parameters[-1]          # Remove trailing ')'.
  1011.  
  1012.         token = self._GetNextToken()
  1013.         while token.token_type == tokenize.NAME:
  1014.             modifier_token = token
  1015.             token = self._GetNextToken()
  1016.             if modifier_token.name == 'const':
  1017.                 modifiers |= FUNCTION_CONST
  1018.             elif modifier_token.name == '__attribute__':
  1019.                 # TODO(nnorwitz): handle more __attribute__ details.
  1020.                 modifiers |= FUNCTION_ATTRIBUTE
  1021.                 assert token.name == '(', token
  1022.                 # Consume everything between the (parens).
  1023.                 unused_tokens = list(self._GetMatchingChar('(', ')'))
  1024.                 token = self._GetNextToken()
  1025.             elif modifier_token.name == 'throw':
  1026.                 modifiers |= FUNCTION_THROW
  1027.                 assert token.name == '(', token
  1028.                 # Consume everything between the (parens).
  1029.                 unused_tokens = list(self._GetMatchingChar('(', ')'))
  1030.                 token = self._GetNextToken()
  1031.             elif modifier_token.name == 'override':
  1032.                 modifiers |= FUNCTION_OVERRIDE
  1033.             elif modifier_token.name == modifier_token.name.upper():
  1034.                 # HACK(nnorwitz):  assume that all upper-case names
  1035.                 # are some macro we aren't expanding.
  1036.                 modifiers |= FUNCTION_UNKNOWN_ANNOTATION
  1037.             else:
  1038.                 self.HandleError('unexpected token', modifier_token)
  1039.  
  1040.         assert token.token_type == tokenize.SYNTAX, token
  1041.         # Handle ctor initializers.
  1042.         if token.name == ':':
  1043.             # TODO(nnorwitz): anything else to handle for initializer list?
  1044.             while token.name != ';' and token.name != '{':
  1045.                 token = self._GetNextToken()
  1046.  
  1047.         # Handle pointer to functions that are really data but look
  1048.         # like method declarations.
  1049.         if token.name == '(':
  1050.             if parameters[0].name == '*':
  1051.                 # name contains the return type.
  1052.                 name = parameters.pop()
  1053.                 # parameters contains the name of the data.
  1054.                 modifiers = [p.name for p in parameters]
  1055.                 # Already at the ( to open the parameter list.
  1056.                 function_parameters = list(self._GetMatchingChar('(', ')'))
  1057.                 del function_parameters[-1]  # Remove trailing ')'.
  1058.                 # TODO(nnorwitz): store the function_parameters.
  1059.                 token = self._GetNextToken()
  1060.                 assert token.token_type == tokenize.SYNTAX, token
  1061.                 assert token.name == ';', token
  1062.                 return self._CreateVariable(indices, name.name, indices.name,
  1063.                                             modifiers, '', None)
  1064.             # At this point, we got something like:
  1065.             #  return_type (type::*name_)(params);
  1066.             # This is a data member called name_ that is a function pointer.
  1067.             # With this code: void (sq_type::*field_)(string&);
  1068.             # We get: name=void return_type=[] parameters=sq_type ... field_
  1069.             # TODO(nnorwitz): is return_type always empty?
  1070.             # TODO(nnorwitz): this isn't even close to being correct.
  1071.             # Just put in something so we don't crash and can move on.
  1072.             real_name = parameters[-1]
  1073.             modifiers = [p.name for p in self._GetParameters()]
  1074.             del modifiers[-1]           # Remove trailing ')'.
  1075.             return self._CreateVariable(indices, real_name.name, indices.name,
  1076.                                         modifiers, '', None)
  1077.  
  1078.         if token.name == '{':
  1079.             body = list(self.GetScope())
  1080.             del body[-1]                # Remove trailing '}'.
  1081.         else:
  1082.             body = None
  1083.             if token.name == '=':
  1084.                 token = self._GetNextToken()
  1085.  
  1086.                 if token.name == 'default' or token.name == 'delete':
  1087.                     # Ignore explicitly defaulted and deleted special members
  1088.                     # in C++11.
  1089.                     token = self._GetNextToken()
  1090.                 else:
  1091.                     # Handle pure-virtual declarations.
  1092.                     assert token.token_type == tokenize.CONSTANT, token
  1093.                     assert token.name == '0', token
  1094.                     modifiers |= FUNCTION_PURE_VIRTUAL
  1095.                     token = self._GetNextToken()
  1096.  
  1097.             if token.name == '[':
  1098.                 # TODO(nnorwitz): store tokens and improve parsing.
  1099.                 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
  1100.                 tokens = list(self._GetMatchingChar('[', ']'))
  1101.                 token = self._GetNextToken()
  1102.  
  1103.             assert token.name == ';', (token, return_type_and_name, parameters)
  1104.  
  1105.         # Looks like we got a method, not a function.
  1106.         if len(return_type) > 2 and return_type[-1].name == '::':
  1107.             return_type, in_class = \
  1108.                          self._GetReturnTypeAndClassName(return_type)
  1109.             return Method(indices.start, indices.end, name.name, in_class,
  1110.                           return_type, parameters, modifiers, templated_types,
  1111.                           body, self.namespace_stack)
  1112.         return Function(indices.start, indices.end, name.name, return_type,
  1113.                         parameters, modifiers, templated_types, body,
  1114.                         self.namespace_stack)
  1115.  
  1116.     def _GetReturnTypeAndClassName(self, token_seq):
  1117.         # Splitting the return type from the class name in a method
  1118.         # can be tricky.  For example, Return::Type::Is::Hard::To::Find().
  1119.         # Where is the return type and where is the class name?
  1120.         # The heuristic used is to pull the last name as the class name.
  1121.         # This includes all the templated type info.
  1122.         # TODO(nnorwitz): if there is only One name like in the
  1123.         # example above, punt and assume the last bit is the class name.
  1124.  
  1125.         # Ignore a :: prefix, if exists so we can find the first real name.
  1126.         i = 0
  1127.         if token_seq[0].name == '::':
  1128.             i = 1
  1129.         # Ignore a :: suffix, if exists.
  1130.         end = len(token_seq) - 1
  1131.         if token_seq[end-1].name == '::':
  1132.             end -= 1
  1133.  
  1134.         # Make a copy of the sequence so we can append a sentinel
  1135.         # value. This is required for GetName will has to have some
  1136.         # terminating condition beyond the last name.
  1137.         seq_copy = token_seq[i:end]
  1138.         seq_copy.append(tokenize.Token(tokenize.SYNTAX, '', 0, 0))
  1139.         names = []
  1140.         while i < end:
  1141.             # Iterate through the sequence parsing out each name.
  1142.             new_name, next = self.GetName(seq_copy[i:])
  1143.             assert new_name, 'Got empty new_name, next=%s' % next
  1144.             # We got a pointer or ref.  Add it to the name.
  1145.             if next and next.token_type == tokenize.SYNTAX:
  1146.                 new_name.append(next)
  1147.             names.append(new_name)
  1148.             i += len(new_name)
  1149.  
  1150.         # Now that we have the names, it's time to undo what we did.
  1151.  
  1152.         # Remove the sentinel value.
  1153.         names[-1].pop()
  1154.         # Flatten the token sequence for the return type.
  1155.         return_type = [e for seq in names[:-1] for e in seq]
  1156.         # The class name is the last name.
  1157.         class_name = names[-1]
  1158.         return return_type, class_name
  1159.  
  1160.     def handle_bool(self):
  1161.         pass
  1162.  
  1163.     def handle_char(self):
  1164.         pass
  1165.  
  1166.     def handle_int(self):
  1167.         pass
  1168.  
  1169.     def handle_long(self):
  1170.         pass
  1171.  
  1172.     def handle_short(self):
  1173.         pass
  1174.  
  1175.     def handle_double(self):
  1176.         pass
  1177.  
  1178.     def handle_float(self):
  1179.         pass
  1180.  
  1181.     def handle_void(self):
  1182.         pass
  1183.  
  1184.     def handle_wchar_t(self):
  1185.         pass
  1186.  
  1187.     def handle_unsigned(self):
  1188.         pass
  1189.  
  1190.     def handle_signed(self):
  1191.         pass
  1192.  
  1193.     def _GetNestedType(self, ctor):
  1194.         name = None
  1195.         name_tokens, token = self.GetName()
  1196.         if name_tokens:
  1197.             name = ''.join([t.name for t in name_tokens])
  1198.  
  1199.         # Handle forward declarations.
  1200.         if token.token_type == tokenize.SYNTAX and token.name == ';':
  1201.             return ctor(token.start, token.end, name, None,
  1202.                         self.namespace_stack)
  1203.  
  1204.         if token.token_type == tokenize.NAME and self._handling_typedef:
  1205.             self._AddBackToken(token)
  1206.             return ctor(token.start, token.end, name, None,
  1207.                         self.namespace_stack)
  1208.  
  1209.         # Must be the type declaration.
  1210.         fields = list(self._GetMatchingChar('{', '}'))
  1211.         del fields[-1]                  # Remove trailing '}'.
  1212.         if token.token_type == tokenize.SYNTAX and token.name == '{':
  1213.             next = self._GetNextToken()
  1214.             new_type = ctor(token.start, token.end, name, fields,
  1215.                             self.namespace_stack)
  1216.             # A name means this is an anonymous type and the name
  1217.             # is the variable declaration.
  1218.             if next.token_type != tokenize.NAME:
  1219.                 return new_type
  1220.             name = new_type
  1221.             token = next
  1222.  
  1223.         # Must be variable declaration using the type prefixed with keyword.
  1224.         assert token.token_type == tokenize.NAME, token
  1225.         return self._CreateVariable(token, token.name, name, [], '', None)
  1226.  
  1227.     def handle_struct(self):
  1228.         # Special case the handling typedef/aliasing of structs here.
  1229.         # It would be a pain to handle in the class code.
  1230.         name_tokens, var_token = self.GetName()
  1231.         if name_tokens:
  1232.             next_token = self._GetNextToken()
  1233.             is_syntax = (var_token.token_type == tokenize.SYNTAX and
  1234.                          var_token.name[0] in '*&')
  1235.             is_variable = (var_token.token_type == tokenize.NAME and
  1236.                            next_token.name == ';')
  1237.             variable = var_token
  1238.             if is_syntax and not is_variable:
  1239.                 variable = next_token
  1240.                 temp = self._GetNextToken()
  1241.                 if temp.token_type == tokenize.SYNTAX and temp.name == '(':
  1242.                     # Handle methods declared to return a struct.
  1243.                     t0 = name_tokens[0]
  1244.                     struct = tokenize.Token(tokenize.NAME, 'struct',
  1245.                                             t0.start-7, t0.start-2)
  1246.                     type_and_name = [struct]
  1247.                     type_and_name.extend(name_tokens)
  1248.                     type_and_name.extend((var_token, next_token))
  1249.                     return self._GetMethod(type_and_name, 0, None, False)
  1250.                 assert temp.name == ';', (temp, name_tokens, var_token)
  1251.             if is_syntax or (is_variable and not self._handling_typedef):
  1252.                 modifiers = ['struct']
  1253.                 type_name = ''.join([t.name for t in name_tokens])
  1254.                 position = name_tokens[0]
  1255.                 return self._CreateVariable(position, variable.name, type_name,
  1256.                                             modifiers, var_token.name, None)
  1257.             name_tokens.extend((var_token, next_token))
  1258.             self._AddBackTokens(name_tokens)
  1259.         else:
  1260.             self._AddBackToken(var_token)
  1261.         return self._GetClass(Struct, VISIBILITY_PUBLIC, None)
  1262.  
  1263.     def handle_union(self):
  1264.         return self._GetNestedType(Union)
  1265.  
  1266.     def handle_enum(self):
  1267.         token = self._GetNextToken()
  1268.         if not (token.token_type == tokenize.NAME and token.name == 'class'):
  1269.             self._AddBackToken(token)
  1270.         return self._GetNestedType(Enum)
  1271.  
  1272.     def handle_auto(self):
  1273.         # TODO(nnorwitz): warn about using auto?  Probably not since it
  1274.         # will be reclaimed and useful for C++0x.
  1275.         pass
  1276.  
  1277.     def handle_register(self):
  1278.         pass
  1279.  
  1280.     def handle_const(self):
  1281.         pass
  1282.  
  1283.     def handle_inline(self):
  1284.         pass
  1285.  
  1286.     def handle_extern(self):
  1287.         pass
  1288.  
  1289.     def handle_static(self):
  1290.         pass
  1291.  
  1292.     def handle_virtual(self):
  1293.         # What follows must be a method.
  1294.         token = token2 = self._GetNextToken()
  1295.         if token.name == 'inline':
  1296.             # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
  1297.             token2 = self._GetNextToken()
  1298.         if token2.token_type == tokenize.SYNTAX and token2.name == '~':
  1299.             return self.GetMethod(FUNCTION_VIRTUAL + FUNCTION_DTOR, None)
  1300.         assert token.token_type == tokenize.NAME or token.name == '::', token
  1301.         return_type_and_name = self._GetTokensUpTo(tokenize.SYNTAX, '(')  # )
  1302.         return_type_and_name.insert(0, token)
  1303.         if token2 is not token:
  1304.             return_type_and_name.insert(1, token2)
  1305.         return self._GetMethod(return_type_and_name, FUNCTION_VIRTUAL,
  1306.                                None, False)
  1307.  
  1308.     def handle_volatile(self):
  1309.         pass
  1310.  
  1311.     def handle_mutable(self):
  1312.         pass
  1313.  
  1314.     def handle_public(self):
  1315.         assert self.in_class
  1316.         self.visibility = VISIBILITY_PUBLIC
  1317.  
  1318.     def handle_protected(self):
  1319.         assert self.in_class
  1320.         self.visibility = VISIBILITY_PROTECTED
  1321.  
  1322.     def handle_private(self):
  1323.         assert self.in_class
  1324.         self.visibility = VISIBILITY_PRIVATE
  1325.  
  1326.     def handle_friend(self):
  1327.         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1328.         assert tokens
  1329.         t0 = tokens[0]
  1330.         return Friend(t0.start, t0.end, tokens, self.namespace_stack)
  1331.  
  1332.     def handle_static_cast(self):
  1333.         pass
  1334.  
  1335.     def handle_const_cast(self):
  1336.         pass
  1337.  
  1338.     def handle_dynamic_cast(self):
  1339.         pass
  1340.  
  1341.     def handle_reinterpret_cast(self):
  1342.         pass
  1343.  
  1344.     def handle_new(self):
  1345.         pass
  1346.  
  1347.     def handle_delete(self):
  1348.         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1349.         assert tokens
  1350.         return Delete(tokens[0].start, tokens[0].end, tokens)
  1351.  
  1352.     def handle_typedef(self):
  1353.         token = self._GetNextToken()
  1354.         if (token.token_type == tokenize.NAME and
  1355.             keywords.IsKeyword(token.name)):
  1356.             # Token must be struct/enum/union/class.
  1357.             method = getattr(self, 'handle_' + token.name)
  1358.             self._handling_typedef = True
  1359.             tokens = [method()]
  1360.             self._handling_typedef = False
  1361.         else:
  1362.             tokens = [token]
  1363.  
  1364.         # Get the remainder of the typedef up to the semi-colon.
  1365.         tokens.extend(self._GetTokensUpTo(tokenize.SYNTAX, ';'))
  1366.  
  1367.         # TODO(nnorwitz): clean all this up.
  1368.         assert tokens
  1369.         name = tokens.pop()
  1370.         indices = name
  1371.         if tokens:
  1372.             indices = tokens[0]
  1373.         if not indices:
  1374.             indices = token
  1375.         if name.name == ')':
  1376.             # HACK(nnorwitz): Handle pointers to functions "properly".
  1377.             if (len(tokens) >= 4 and
  1378.                 tokens[1].name == '(' and tokens[2].name == '*'):
  1379.                 tokens.append(name)
  1380.                 name = tokens[3]
  1381.         elif name.name == ']':
  1382.             # HACK(nnorwitz): Handle arrays properly.
  1383.             if len(tokens) >= 2:
  1384.                 tokens.append(name)
  1385.                 name = tokens[1]
  1386.         new_type = tokens
  1387.         if tokens and isinstance(tokens[0], tokenize.Token):
  1388.             new_type = self.converter.ToType(tokens)[0]
  1389.         return Typedef(indices.start, indices.end, name.name,
  1390.                        new_type, self.namespace_stack)
  1391.  
  1392.     def handle_typeid(self):
  1393.         pass  # Not needed yet.
  1394.  
  1395.     def handle_typename(self):
  1396.         pass  # Not needed yet.
  1397.  
  1398.     def _GetTemplatedTypes(self):
  1399.         result = {}
  1400.         tokens = list(self._GetMatchingChar('<', '>'))
  1401.         len_tokens = len(tokens) - 1    # Ignore trailing '>'.
  1402.         i = 0
  1403.         while i < len_tokens:
  1404.             key = tokens[i].name
  1405.             i += 1
  1406.             if keywords.IsKeyword(key) or key == ',':
  1407.                 continue
  1408.             type_name = default = None
  1409.             if i < len_tokens:
  1410.                 i += 1
  1411.                 if tokens[i-1].name == '=':
  1412.                     assert i < len_tokens, '%s %s' % (i, tokens)
  1413.                     default, unused_next_token = self.GetName(tokens[i:])
  1414.                     i += len(default)
  1415.                 else:
  1416.                     if tokens[i-1].name != ',':
  1417.                         # We got something like: Type variable.
  1418.                         # Re-adjust the key (variable) and type_name (Type).
  1419.                         key = tokens[i-1].name
  1420.                         type_name = tokens[i-2]
  1421.  
  1422.             result[key] = (type_name, default)
  1423.         return result
  1424.  
  1425.     def handle_template(self):
  1426.         token = self._GetNextToken()
  1427.         assert token.token_type == tokenize.SYNTAX, token
  1428.         assert token.name == '<', token
  1429.         templated_types = self._GetTemplatedTypes()
  1430.         # TODO(nnorwitz): for now, just ignore the template params.
  1431.         token = self._GetNextToken()
  1432.         if token.token_type == tokenize.NAME:
  1433.             if token.name == 'class':
  1434.                 return self._GetClass(Class, VISIBILITY_PRIVATE, templated_types)
  1435.             elif token.name == 'struct':
  1436.                 return self._GetClass(Struct, VISIBILITY_PUBLIC, templated_types)
  1437.             elif token.name == 'friend':
  1438.                 return self.handle_friend()
  1439.         self._AddBackToken(token)
  1440.         tokens, last = self._GetVarTokensUpTo(tokenize.SYNTAX, '(', ';')
  1441.         tokens.append(last)
  1442.         self._AddBackTokens(tokens)
  1443.         if last.name == '(':
  1444.             return self.GetMethod(FUNCTION_NONE, templated_types)
  1445.         # Must be a variable definition.
  1446.         return None
  1447.  
  1448.     def handle_true(self):
  1449.         pass  # Nothing to do.
  1450.  
  1451.     def handle_false(self):
  1452.         pass  # Nothing to do.
  1453.  
  1454.     def handle_asm(self):
  1455.         pass  # Not needed yet.
  1456.  
  1457.     def handle_class(self):
  1458.         return self._GetClass(Class, VISIBILITY_PRIVATE, None)
  1459.  
  1460.     def _GetBases(self):
  1461.         # Get base classes.
  1462.         bases = []
  1463.         while 1:
  1464.             token = self._GetNextToken()
  1465.             assert token.token_type == tokenize.NAME, token
  1466.             # TODO(nnorwitz): store kind of inheritance...maybe.
  1467.             if token.name not in ('public', 'protected', 'private'):
  1468.                 # If inheritance type is not specified, it is private.
  1469.                 # Just put the token back so we can form a name.
  1470.                 # TODO(nnorwitz): it would be good to warn about this.
  1471.                 self._AddBackToken(token)
  1472.             else:
  1473.                 # Check for virtual inheritance.
  1474.                 token = self._GetNextToken()
  1475.                 if token.name != 'virtual':
  1476.                     self._AddBackToken(token)
  1477.                 else:
  1478.                     # TODO(nnorwitz): store that we got virtual for this base.
  1479.                     pass
  1480.             base, next_token = self.GetName()
  1481.             bases_ast = self.converter.ToType(base)
  1482.             assert len(bases_ast) == 1, bases_ast
  1483.             bases.append(bases_ast[0])
  1484.             assert next_token.token_type == tokenize.SYNTAX, next_token
  1485.             if next_token.name == '{':
  1486.                 token = next_token
  1487.                 break
  1488.             # Support multiple inheritance.
  1489.             assert next_token.name == ',', next_token
  1490.         return bases, token
  1491.  
  1492.     def _GetClass(self, class_type, visibility, templated_types):
  1493.         class_name = None
  1494.         class_token = self._GetNextToken()
  1495.         if class_token.token_type != tokenize.NAME:
  1496.             assert class_token.token_type == tokenize.SYNTAX, class_token
  1497.             token = class_token
  1498.         else:
  1499.             # Skip any macro (e.g. storage class specifiers) after the
  1500.             # 'class' keyword.
  1501.             next_token = self._GetNextToken()
  1502.             if next_token.token_type == tokenize.NAME:
  1503.                 self._AddBackToken(next_token)
  1504.             else:
  1505.                 self._AddBackTokens([class_token, next_token])
  1506.             name_tokens, token = self.GetName()
  1507.             class_name = ''.join([t.name for t in name_tokens])
  1508.         bases = None
  1509.         if token.token_type == tokenize.SYNTAX:
  1510.             if token.name == ';':
  1511.                 # Forward declaration.
  1512.                 return class_type(class_token.start, class_token.end,
  1513.                                   class_name, None, templated_types, None,
  1514.                                   self.namespace_stack)
  1515.             if token.name in '*&':
  1516.                 # Inline forward declaration.  Could be method or data.
  1517.                 name_token = self._GetNextToken()
  1518.                 next_token = self._GetNextToken()
  1519.                 if next_token.name == ';':
  1520.                     # Handle data
  1521.                     modifiers = ['class']
  1522.                     return self._CreateVariable(class_token, name_token.name,
  1523.                                                 class_name,
  1524.                                                 modifiers, token.name, None)
  1525.                 else:
  1526.                     # Assume this is a method.
  1527.                     tokens = (class_token, token, name_token, next_token)
  1528.                     self._AddBackTokens(tokens)
  1529.                     return self.GetMethod(FUNCTION_NONE, None)
  1530.             if token.name == ':':
  1531.                 bases, token = self._GetBases()
  1532.  
  1533.         body = None
  1534.         if token.token_type == tokenize.SYNTAX and token.name == '{':
  1535.             assert token.token_type == tokenize.SYNTAX, token
  1536.             assert token.name == '{', token
  1537.  
  1538.             ast = AstBuilder(self.GetScope(), self.filename, class_name,
  1539.                              visibility, self.namespace_stack)
  1540.             body = list(ast.Generate())
  1541.  
  1542.             if not self._handling_typedef:
  1543.                 token = self._GetNextToken()
  1544.                 if token.token_type != tokenize.NAME:
  1545.                     assert token.token_type == tokenize.SYNTAX, token
  1546.                     assert token.name == ';', token
  1547.                 else:
  1548.                     new_class = class_type(class_token.start, class_token.end,
  1549.                                            class_name, bases, None,
  1550.                                            body, self.namespace_stack)
  1551.  
  1552.                     modifiers = []
  1553.                     return self._CreateVariable(class_token,
  1554.                                                 token.name, new_class,
  1555.                                                 modifiers, token.name, None)
  1556.         else:
  1557.             if not self._handling_typedef:
  1558.                 self.HandleError('non-typedef token', token)
  1559.             self._AddBackToken(token)
  1560.  
  1561.         return class_type(class_token.start, class_token.end, class_name,
  1562.                           bases, templated_types, body, self.namespace_stack)
  1563.  
  1564.     def handle_namespace(self):
  1565.         token = self._GetNextToken()
  1566.         # Support anonymous namespaces.
  1567.         name = None
  1568.         if token.token_type == tokenize.NAME:
  1569.             name = token.name
  1570.             token = self._GetNextToken()
  1571.         self.namespace_stack.append(name)
  1572.         assert token.token_type == tokenize.SYNTAX, token
  1573.         # Create an internal token that denotes when the namespace is complete.
  1574.         internal_token = tokenize.Token(_INTERNAL_TOKEN, _NAMESPACE_POP,
  1575.                                         None, None)
  1576.         internal_token.whence = token.whence
  1577.         if token.name == '=':
  1578.             # TODO(nnorwitz): handle aliasing namespaces.
  1579.             name, next_token = self.GetName()
  1580.             assert next_token.name == ';', next_token
  1581.             self._AddBackToken(internal_token)
  1582.         else:
  1583.             assert token.name == '{', token
  1584.             tokens = list(self.GetScope())
  1585.             # Replace the trailing } with the internal namespace pop token.
  1586.             tokens[-1] = internal_token
  1587.             # Handle namespace with nothing in it.
  1588.             self._AddBackTokens(tokens)
  1589.         return None
  1590.  
  1591.     def handle_using(self):
  1592.         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1593.         assert tokens
  1594.         return Using(tokens[0].start, tokens[0].end, tokens)
  1595.  
  1596.     def handle_explicit(self):
  1597.         assert self.in_class
  1598.         # Nothing much to do.
  1599.         # TODO(nnorwitz): maybe verify the method name == class name.
  1600.         # This must be a ctor.
  1601.         return self.GetMethod(FUNCTION_CTOR, None)
  1602.  
  1603.     def handle_this(self):
  1604.         pass  # Nothing to do.
  1605.  
  1606.     def handle_operator(self):
  1607.         # Pull off the next token(s?) and make that part of the method name.
  1608.         pass
  1609.  
  1610.     def handle_sizeof(self):
  1611.         pass
  1612.  
  1613.     def handle_case(self):
  1614.         pass
  1615.  
  1616.     def handle_switch(self):
  1617.         pass
  1618.  
  1619.     def handle_default(self):
  1620.         token = self._GetNextToken()
  1621.         assert token.token_type == tokenize.SYNTAX
  1622.         assert token.name == ':'
  1623.  
  1624.     def handle_if(self):
  1625.         pass
  1626.  
  1627.     def handle_else(self):
  1628.         pass
  1629.  
  1630.     def handle_return(self):
  1631.         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1632.         if not tokens:
  1633.             return Return(self.current_token.start, self.current_token.end, None)
  1634.         return Return(tokens[0].start, tokens[0].end, tokens)
  1635.  
  1636.     def handle_goto(self):
  1637.         tokens = self._GetTokensUpTo(tokenize.SYNTAX, ';')
  1638.         assert len(tokens) == 1, str(tokens)
  1639.         return Goto(tokens[0].start, tokens[0].end, tokens[0].name)
  1640.  
  1641.     def handle_try(self):
  1642.         pass  # Not needed yet.
  1643.  
  1644.     def handle_catch(self):
  1645.         pass  # Not needed yet.
  1646.  
  1647.     def handle_throw(self):
  1648.         pass  # Not needed yet.
  1649.  
  1650.     def handle_while(self):
  1651.         pass
  1652.  
  1653.     def handle_do(self):
  1654.         pass
  1655.  
  1656.     def handle_for(self):
  1657.         pass
  1658.  
  1659.     def handle_break(self):
  1660.         self._IgnoreUpTo(tokenize.SYNTAX, ';')
  1661.  
  1662.     def handle_continue(self):
  1663.         self._IgnoreUpTo(tokenize.SYNTAX, ';')
  1664.  
  1665.  
  1666. def BuilderFromSource(source, filename):
  1667.     """Utility method that returns an AstBuilder from source code.
  1668.  
  1669.    Args:
  1670.      source: 'C++ source code'
  1671.      filename: 'file1'
  1672.  
  1673.    Returns:
  1674.      AstBuilder
  1675.    """
  1676.     return AstBuilder(tokenize.GetTokens(source), filename)
  1677.  
  1678.  
  1679. def PrintIndentifiers(filename, should_print):
  1680.     """Prints all identifiers for a C++ source file.
  1681.  
  1682.    Args:
  1683.      filename: 'file1'
  1684.      should_print: predicate with signature: bool Function(token)
  1685.    """
  1686.     source = utils.ReadFile(filename, False)
  1687.     if source is None:
  1688.         sys.stderr.write('Unable to find: %s\n' % filename)
  1689.         return
  1690.  
  1691.     #print('Processing %s' % actual_filename)
  1692.     builder = BuilderFromSource(source, filename)
  1693.     try:
  1694.         for node in builder.Generate():
  1695.             if should_print(node):
  1696.                 print(node.name)
  1697.     except KeyboardInterrupt:
  1698.         return
  1699.     except:
  1700.         pass
  1701.  
  1702.  
  1703. def PrintAllIndentifiers(filenames, should_print):
  1704.     """Prints all identifiers for each C++ source file in filenames.
  1705.  
  1706.    Args:
  1707.      filenames: ['file1', 'file2', ...]
  1708.      should_print: predicate with signature: bool Function(token)
  1709.    """
  1710.     for path in filenames:
  1711.         PrintIndentifiers(path, should_print)
  1712.  
  1713.  
  1714. def main(argv):
  1715.     for filename in argv[1:]:
  1716.         source = utils.ReadFile(filename)
  1717.         if source is None:
  1718.             continue
  1719.  
  1720.         print('Processing %s' % filename)
  1721.         builder = BuilderFromSource(source, filename)
  1722.         try:
  1723.             entire_ast = filter(None, builder.Generate())
  1724.         except KeyboardInterrupt:
  1725.             return
  1726.         except:
  1727.             # Already printed a warning, print the traceback and continue.
  1728.             traceback.print_exc()
  1729.         else:
  1730.             if utils.DEBUG:
  1731.                 for ast in entire_ast:
  1732.                     print(ast)
  1733.  
  1734.  
  1735. if __name__ == '__main__':
  1736.     main(sys.argv)

Raw Paste


Login or Register to edit or fork this paste. It's free.