Spaces:
Runtime error
Runtime error
| """ | |
| Parse Python code and perform AST validation. | |
| """ | |
| import ast | |
| import sys | |
| from typing import Final, Iterable, Iterator, List, Set, Tuple | |
| from black.mode import VERSION_TO_FEATURES, Feature, TargetVersion, supports_feature | |
| from black.nodes import syms | |
| from blib2to3 import pygram | |
| from blib2to3.pgen2 import driver | |
| from blib2to3.pgen2.grammar import Grammar | |
| from blib2to3.pgen2.parse import ParseError | |
| from blib2to3.pgen2.tokenize import TokenError | |
| from blib2to3.pytree import Leaf, Node | |
| PY2_HINT: Final = "Python 2 support was removed in version 22.0." | |
| class InvalidInput(ValueError): | |
| """Raised when input source code fails all parse attempts.""" | |
| def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: | |
| if not target_versions: | |
| # No target_version specified, so try all grammars. | |
| return [ | |
| # Python 3.7-3.9 | |
| pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords, | |
| # Python 3.0-3.6 | |
| pygram.python_grammar_no_print_statement_no_exec_statement, | |
| # Python 3.10+ | |
| pygram.python_grammar_soft_keywords, | |
| ] | |
| grammars = [] | |
| # If we have to parse both, try to parse async as a keyword first | |
| if not supports_feature( | |
| target_versions, Feature.ASYNC_IDENTIFIERS | |
| ) and not supports_feature(target_versions, Feature.PATTERN_MATCHING): | |
| # Python 3.7-3.9 | |
| grammars.append( | |
| pygram.python_grammar_no_print_statement_no_exec_statement_async_keywords | |
| ) | |
| if not supports_feature(target_versions, Feature.ASYNC_KEYWORDS): | |
| # Python 3.0-3.6 | |
| grammars.append(pygram.python_grammar_no_print_statement_no_exec_statement) | |
| if any(Feature.PATTERN_MATCHING in VERSION_TO_FEATURES[v] for v in target_versions): | |
| # Python 3.10+ | |
| grammars.append(pygram.python_grammar_soft_keywords) | |
| # At least one of the above branches must have been taken, because every Python | |
| # version has exactly one of the two 'ASYNC_*' flags | |
| return grammars | |
| def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) -> Node: | |
| """Given a string with source, return the lib2to3 Node.""" | |
| if not src_txt.endswith("\n"): | |
| src_txt += "\n" | |
| grammars = get_grammars(set(target_versions)) | |
| errors = {} | |
| for grammar in grammars: | |
| drv = driver.Driver(grammar) | |
| try: | |
| result = drv.parse_string(src_txt, True) | |
| break | |
| except ParseError as pe: | |
| lineno, column = pe.context[1] | |
| lines = src_txt.splitlines() | |
| try: | |
| faulty_line = lines[lineno - 1] | |
| except IndexError: | |
| faulty_line = "<line number missing in source>" | |
| errors[grammar.version] = InvalidInput( | |
| f"Cannot parse: {lineno}:{column}: {faulty_line}" | |
| ) | |
| except TokenError as te: | |
| # In edge cases these are raised; and typically don't have a "faulty_line". | |
| lineno, column = te.args[1] | |
| errors[grammar.version] = InvalidInput( | |
| f"Cannot parse: {lineno}:{column}: {te.args[0]}" | |
| ) | |
| else: | |
| # Choose the latest version when raising the actual parsing error. | |
| assert len(errors) >= 1 | |
| exc = errors[max(errors)] | |
| if matches_grammar(src_txt, pygram.python_grammar) or matches_grammar( | |
| src_txt, pygram.python_grammar_no_print_statement | |
| ): | |
| original_msg = exc.args[0] | |
| msg = f"{original_msg}\n{PY2_HINT}" | |
| raise InvalidInput(msg) from None | |
| raise exc from None | |
| if isinstance(result, Leaf): | |
| result = Node(syms.file_input, [result]) | |
| return result | |
| def matches_grammar(src_txt: str, grammar: Grammar) -> bool: | |
| drv = driver.Driver(grammar) | |
| try: | |
| drv.parse_string(src_txt, True) | |
| except (ParseError, TokenError, IndentationError): | |
| return False | |
| else: | |
| return True | |
| def lib2to3_unparse(node: Node) -> str: | |
| """Given a lib2to3 node, return its string representation.""" | |
| code = str(node) | |
| return code | |
| def parse_single_version( | |
| src: str, version: Tuple[int, int], *, type_comments: bool | |
| ) -> ast.AST: | |
| filename = "<unknown>" | |
| return ast.parse( | |
| src, filename, feature_version=version, type_comments=type_comments | |
| ) | |
| def parse_ast(src: str) -> ast.AST: | |
| # TODO: support Python 4+ ;) | |
| versions = [(3, minor) for minor in range(3, sys.version_info[1] + 1)] | |
| first_error = "" | |
| for version in sorted(versions, reverse=True): | |
| try: | |
| return parse_single_version(src, version, type_comments=True) | |
| except SyntaxError as e: | |
| if not first_error: | |
| first_error = str(e) | |
| # Try to parse without type comments | |
| for version in sorted(versions, reverse=True): | |
| try: | |
| return parse_single_version(src, version, type_comments=False) | |
| except SyntaxError: | |
| pass | |
| raise SyntaxError(first_error) | |
| def _normalize(lineend: str, value: str) -> str: | |
| # To normalize, we strip any leading and trailing space from | |
| # each line... | |
| stripped: List[str] = [i.strip() for i in value.splitlines()] | |
| normalized = lineend.join(stripped) | |
| # ...and remove any blank lines at the beginning and end of | |
| # the whole string | |
| return normalized.strip() | |
| def stringify_ast(node: ast.AST, depth: int = 0) -> Iterator[str]: | |
| """Simple visitor generating strings to compare ASTs by content.""" | |
| if ( | |
| isinstance(node, ast.Constant) | |
| and isinstance(node.value, str) | |
| and node.kind == "u" | |
| ): | |
| # It's a quirk of history that we strip the u prefix over here. We used to | |
| # rewrite the AST nodes for Python version compatibility and we never copied | |
| # over the kind | |
| node.kind = None | |
| yield f"{' ' * depth}{node.__class__.__name__}(" | |
| for field in sorted(node._fields): # noqa: F402 | |
| # TypeIgnore has only one field 'lineno' which breaks this comparison | |
| if isinstance(node, ast.TypeIgnore): | |
| break | |
| try: | |
| value: object = getattr(node, field) | |
| except AttributeError: | |
| continue | |
| yield f"{' ' * (depth+1)}{field}=" | |
| if isinstance(value, list): | |
| for item in value: | |
| # Ignore nested tuples within del statements, because we may insert | |
| # parentheses and they change the AST. | |
| if ( | |
| field == "targets" | |
| and isinstance(node, ast.Delete) | |
| and isinstance(item, ast.Tuple) | |
| ): | |
| for elt in item.elts: | |
| yield from stringify_ast(elt, depth + 2) | |
| elif isinstance(item, ast.AST): | |
| yield from stringify_ast(item, depth + 2) | |
| elif isinstance(value, ast.AST): | |
| yield from stringify_ast(value, depth + 2) | |
| else: | |
| normalized: object | |
| if ( | |
| isinstance(node, ast.Constant) | |
| and field == "value" | |
| and isinstance(value, str) | |
| ): | |
| # Constant strings may be indented across newlines, if they are | |
| # docstrings; fold spaces after newlines when comparing. Similarly, | |
| # trailing and leading space may be removed. | |
| normalized = _normalize("\n", value) | |
| elif field == "type_comment" and isinstance(value, str): | |
| # Trailing whitespace in type comments is removed. | |
| normalized = value.rstrip() | |
| else: | |
| normalized = value | |
| yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}" | |
| yield f"{' ' * depth}) # /{node.__class__.__name__}" | |