sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111class _Parser(type): 112 def __new__(cls, clsname, bases, attrs): 113 klass = super().__new__(cls, clsname, bases, attrs) 114 115 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 116 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 117 118 return klass 119 120 121class Parser(metaclass=_Parser): 122 """ 123 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 124 125 Args: 126 error_level: The desired error level. 127 Default: ErrorLevel.IMMEDIATE 128 error_message_context: The amount of context to capture from a query string when displaying 129 the error message (in number of characters). 130 Default: 100 131 max_errors: Maximum number of error messages to include in a raised ParseError. 132 This is only relevant if error_level is ErrorLevel.RAISE. 133 Default: 3 134 """ 135 136 FUNCTIONS: t.Dict[str, t.Callable] = { 137 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 138 "CONCAT": lambda args, dialect: exp.Concat( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 144 expressions=args, 145 safe=not dialect.STRICT_STRING_CONCAT, 146 coalesce=dialect.CONCAT_COALESCE, 147 ), 148 "DATE_TO_DATE_STR": lambda args: exp.Cast( 149 this=seq_get(args, 0), 150 to=exp.DataType(this=exp.DataType.Type.TEXT), 151 ), 152 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 153 "HEX": build_hex, 154 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 155 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 156 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 157 "LIKE": build_like, 158 "LOG": build_logarithm, 159 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 160 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 161 "LOWER": build_lower, 162 "MOD": build_mod, 163 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 164 if len(args) != 2 165 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 166 "TIME_TO_TIME_STR": lambda args: exp.Cast( 167 this=seq_get(args, 0), 168 to=exp.DataType(this=exp.DataType.Type.TEXT), 169 ), 170 "TO_HEX": build_hex, 171 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 172 this=exp.Cast( 173 this=seq_get(args, 0), 174 to=exp.DataType(this=exp.DataType.Type.TEXT), 175 ), 176 start=exp.Literal.number(1), 177 length=exp.Literal.number(10), 178 ), 179 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 180 "UPPER": build_upper, 181 "VAR_MAP": build_var_map, 182 } 183 184 NO_PAREN_FUNCTIONS = { 185 TokenType.CURRENT_DATE: exp.CurrentDate, 186 TokenType.CURRENT_DATETIME: exp.CurrentDate, 187 TokenType.CURRENT_TIME: exp.CurrentTime, 188 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 189 TokenType.CURRENT_USER: exp.CurrentUser, 190 } 191 192 STRUCT_TYPE_TOKENS = { 193 TokenType.NESTED, 194 TokenType.OBJECT, 195 TokenType.STRUCT, 196 } 197 198 NESTED_TYPE_TOKENS = { 199 TokenType.ARRAY, 200 TokenType.LIST, 201 TokenType.LOWCARDINALITY, 202 TokenType.MAP, 203 TokenType.NULLABLE, 204 *STRUCT_TYPE_TOKENS, 205 } 206 207 ENUM_TYPE_TOKENS = { 208 TokenType.ENUM, 209 TokenType.ENUM8, 210 TokenType.ENUM16, 211 } 212 213 AGGREGATE_TYPE_TOKENS = { 214 TokenType.AGGREGATEFUNCTION, 215 TokenType.SIMPLEAGGREGATEFUNCTION, 216 } 217 218 TYPE_TOKENS = { 219 TokenType.BIT, 220 TokenType.BOOLEAN, 221 TokenType.TINYINT, 222 TokenType.UTINYINT, 223 TokenType.SMALLINT, 224 TokenType.USMALLINT, 225 TokenType.INT, 226 TokenType.UINT, 227 TokenType.BIGINT, 228 TokenType.UBIGINT, 229 TokenType.INT128, 230 TokenType.UINT128, 231 TokenType.INT256, 232 TokenType.UINT256, 233 TokenType.MEDIUMINT, 234 TokenType.UMEDIUMINT, 235 TokenType.FIXEDSTRING, 236 TokenType.FLOAT, 237 TokenType.DOUBLE, 238 TokenType.CHAR, 239 TokenType.NCHAR, 240 TokenType.VARCHAR, 241 TokenType.NVARCHAR, 242 TokenType.BPCHAR, 243 TokenType.TEXT, 244 TokenType.MEDIUMTEXT, 245 TokenType.LONGTEXT, 246 TokenType.MEDIUMBLOB, 247 TokenType.LONGBLOB, 248 TokenType.BINARY, 249 TokenType.VARBINARY, 250 TokenType.JSON, 251 TokenType.JSONB, 252 TokenType.INTERVAL, 253 TokenType.TINYBLOB, 254 TokenType.TINYTEXT, 255 TokenType.TIME, 256 TokenType.TIMETZ, 257 TokenType.TIMESTAMP, 258 TokenType.TIMESTAMP_S, 259 TokenType.TIMESTAMP_MS, 260 TokenType.TIMESTAMP_NS, 261 TokenType.TIMESTAMPTZ, 262 TokenType.TIMESTAMPLTZ, 263 TokenType.TIMESTAMPNTZ, 264 TokenType.DATETIME, 265 TokenType.DATETIME64, 266 TokenType.DATE, 267 TokenType.DATE32, 268 TokenType.INT4RANGE, 269 TokenType.INT4MULTIRANGE, 270 TokenType.INT8RANGE, 271 TokenType.INT8MULTIRANGE, 272 TokenType.NUMRANGE, 273 TokenType.NUMMULTIRANGE, 274 TokenType.TSRANGE, 275 TokenType.TSMULTIRANGE, 276 TokenType.TSTZRANGE, 277 TokenType.TSTZMULTIRANGE, 278 TokenType.DATERANGE, 279 TokenType.DATEMULTIRANGE, 280 TokenType.DECIMAL, 281 TokenType.UDECIMAL, 282 TokenType.BIGDECIMAL, 283 TokenType.UUID, 284 TokenType.GEOGRAPHY, 285 TokenType.GEOMETRY, 286 TokenType.HLLSKETCH, 287 TokenType.HSTORE, 288 TokenType.PSEUDO_TYPE, 289 TokenType.SUPER, 290 TokenType.SERIAL, 291 TokenType.SMALLSERIAL, 292 TokenType.BIGSERIAL, 293 TokenType.XML, 294 TokenType.YEAR, 295 TokenType.UNIQUEIDENTIFIER, 296 TokenType.USERDEFINED, 297 TokenType.MONEY, 298 TokenType.SMALLMONEY, 299 TokenType.ROWVERSION, 300 TokenType.IMAGE, 301 TokenType.VARIANT, 302 TokenType.OBJECT, 303 TokenType.OBJECT_IDENTIFIER, 304 TokenType.INET, 305 TokenType.IPADDRESS, 306 TokenType.IPPREFIX, 307 TokenType.IPV4, 308 TokenType.IPV6, 309 TokenType.UNKNOWN, 310 TokenType.NULL, 311 TokenType.NAME, 312 TokenType.TDIGEST, 313 *ENUM_TYPE_TOKENS, 314 *NESTED_TYPE_TOKENS, 315 *AGGREGATE_TYPE_TOKENS, 316 } 317 318 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 319 TokenType.BIGINT: TokenType.UBIGINT, 320 TokenType.INT: TokenType.UINT, 321 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 322 TokenType.SMALLINT: TokenType.USMALLINT, 323 TokenType.TINYINT: TokenType.UTINYINT, 324 TokenType.DECIMAL: TokenType.UDECIMAL, 325 } 326 327 SUBQUERY_PREDICATES = { 328 TokenType.ANY: exp.Any, 329 TokenType.ALL: exp.All, 330 TokenType.EXISTS: exp.Exists, 331 TokenType.SOME: exp.Any, 332 } 333 334 RESERVED_TOKENS = { 335 *Tokenizer.SINGLE_TOKENS.values(), 336 TokenType.SELECT, 337 } - {TokenType.IDENTIFIER} 338 339 DB_CREATABLES = { 340 TokenType.DATABASE, 341 TokenType.DICTIONARY, 342 TokenType.MODEL, 343 TokenType.SCHEMA, 344 TokenType.SEQUENCE, 345 TokenType.STORAGE_INTEGRATION, 346 TokenType.TABLE, 347 TokenType.TAG, 348 TokenType.VIEW, 349 TokenType.WAREHOUSE, 350 TokenType.STREAMLIT, 351 } 352 353 CREATABLES = { 354 TokenType.COLUMN, 355 TokenType.CONSTRAINT, 356 TokenType.FOREIGN_KEY, 357 TokenType.FUNCTION, 358 TokenType.INDEX, 359 TokenType.PROCEDURE, 360 *DB_CREATABLES, 361 } 362 363 # Tokens that can represent identifiers 364 ID_VAR_TOKENS = { 365 TokenType.VAR, 366 TokenType.ANTI, 367 TokenType.APPLY, 368 TokenType.ASC, 369 TokenType.ASOF, 370 TokenType.AUTO_INCREMENT, 371 TokenType.BEGIN, 372 TokenType.BPCHAR, 373 TokenType.CACHE, 374 TokenType.CASE, 375 TokenType.COLLATE, 376 TokenType.COMMAND, 377 TokenType.COMMENT, 378 TokenType.COMMIT, 379 TokenType.CONSTRAINT, 380 TokenType.COPY, 381 TokenType.DEFAULT, 382 TokenType.DELETE, 383 TokenType.DESC, 384 TokenType.DESCRIBE, 385 TokenType.DICTIONARY, 386 TokenType.DIV, 387 TokenType.END, 388 TokenType.EXECUTE, 389 TokenType.ESCAPE, 390 TokenType.FALSE, 391 TokenType.FIRST, 392 TokenType.FILTER, 393 TokenType.FINAL, 394 TokenType.FORMAT, 395 TokenType.FULL, 396 TokenType.IDENTIFIER, 397 TokenType.IS, 398 TokenType.ISNULL, 399 TokenType.INTERVAL, 400 TokenType.KEEP, 401 TokenType.KILL, 402 TokenType.LEFT, 403 TokenType.LOAD, 404 TokenType.MERGE, 405 TokenType.NATURAL, 406 TokenType.NEXT, 407 TokenType.OFFSET, 408 TokenType.OPERATOR, 409 TokenType.ORDINALITY, 410 TokenType.OVERLAPS, 411 TokenType.OVERWRITE, 412 TokenType.PARTITION, 413 TokenType.PERCENT, 414 TokenType.PIVOT, 415 TokenType.PRAGMA, 416 TokenType.RANGE, 417 TokenType.RECURSIVE, 418 TokenType.REFERENCES, 419 TokenType.REFRESH, 420 TokenType.REPLACE, 421 TokenType.RIGHT, 422 TokenType.ROLLUP, 423 TokenType.ROW, 424 TokenType.ROWS, 425 TokenType.SEMI, 426 TokenType.SET, 427 TokenType.SETTINGS, 428 TokenType.SHOW, 429 TokenType.TEMPORARY, 430 TokenType.TOP, 431 TokenType.TRUE, 432 TokenType.TRUNCATE, 433 TokenType.UNIQUE, 434 TokenType.UNNEST, 435 TokenType.UNPIVOT, 436 TokenType.UPDATE, 437 TokenType.USE, 438 TokenType.VOLATILE, 439 TokenType.WINDOW, 440 *CREATABLES, 441 *SUBQUERY_PREDICATES, 442 *TYPE_TOKENS, 443 *NO_PAREN_FUNCTIONS, 444 } 445 446 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 447 448 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 449 TokenType.ANTI, 450 TokenType.APPLY, 451 TokenType.ASOF, 452 TokenType.FULL, 453 TokenType.LEFT, 454 TokenType.LOCK, 455 TokenType.NATURAL, 456 TokenType.OFFSET, 457 TokenType.RIGHT, 458 TokenType.SEMI, 459 TokenType.WINDOW, 460 } 461 462 ALIAS_TOKENS = ID_VAR_TOKENS 463 464 ARRAY_CONSTRUCTORS = { 465 "ARRAY": exp.Array, 466 "LIST": exp.List, 467 } 468 469 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 470 471 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 472 473 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 474 475 FUNC_TOKENS = { 476 TokenType.COLLATE, 477 TokenType.COMMAND, 478 TokenType.CURRENT_DATE, 479 TokenType.CURRENT_DATETIME, 480 TokenType.CURRENT_TIMESTAMP, 481 TokenType.CURRENT_TIME, 482 TokenType.CURRENT_USER, 483 TokenType.FILTER, 484 TokenType.FIRST, 485 TokenType.FORMAT, 486 TokenType.GLOB, 487 TokenType.IDENTIFIER, 488 TokenType.INDEX, 489 TokenType.ISNULL, 490 TokenType.ILIKE, 491 TokenType.INSERT, 492 TokenType.LIKE, 493 TokenType.MERGE, 494 TokenType.OFFSET, 495 TokenType.PRIMARY_KEY, 496 TokenType.RANGE, 497 TokenType.REPLACE, 498 TokenType.RLIKE, 499 TokenType.ROW, 500 TokenType.UNNEST, 501 TokenType.VAR, 502 TokenType.LEFT, 503 TokenType.RIGHT, 504 TokenType.SEQUENCE, 505 TokenType.DATE, 506 TokenType.DATETIME, 507 TokenType.TABLE, 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TRUNCATE, 511 TokenType.WINDOW, 512 TokenType.XOR, 513 *TYPE_TOKENS, 514 *SUBQUERY_PREDICATES, 515 } 516 517 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 518 TokenType.AND: exp.And, 519 } 520 521 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 522 TokenType.COLON_EQ: exp.PropertyEQ, 523 } 524 525 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 526 TokenType.OR: exp.Or, 527 } 528 529 EQUALITY = { 530 TokenType.EQ: exp.EQ, 531 TokenType.NEQ: exp.NEQ, 532 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 533 } 534 535 COMPARISON = { 536 TokenType.GT: exp.GT, 537 TokenType.GTE: exp.GTE, 538 TokenType.LT: exp.LT, 539 TokenType.LTE: exp.LTE, 540 } 541 542 BITWISE = { 543 TokenType.AMP: exp.BitwiseAnd, 544 TokenType.CARET: exp.BitwiseXor, 545 TokenType.PIPE: exp.BitwiseOr, 546 } 547 548 TERM = { 549 TokenType.DASH: exp.Sub, 550 TokenType.PLUS: exp.Add, 551 TokenType.MOD: exp.Mod, 552 TokenType.COLLATE: exp.Collate, 553 } 554 555 FACTOR = { 556 TokenType.DIV: exp.IntDiv, 557 TokenType.LR_ARROW: exp.Distance, 558 TokenType.SLASH: exp.Div, 559 TokenType.STAR: exp.Mul, 560 } 561 562 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 563 564 TIMES = { 565 TokenType.TIME, 566 TokenType.TIMETZ, 567 } 568 569 TIMESTAMPS = { 570 TokenType.TIMESTAMP, 571 TokenType.TIMESTAMPTZ, 572 TokenType.TIMESTAMPLTZ, 573 *TIMES, 574 } 575 576 SET_OPERATIONS = { 577 TokenType.UNION, 578 TokenType.INTERSECT, 579 TokenType.EXCEPT, 580 } 581 582 JOIN_METHODS = { 583 TokenType.ASOF, 584 TokenType.NATURAL, 585 TokenType.POSITIONAL, 586 } 587 588 JOIN_SIDES = { 589 TokenType.LEFT, 590 TokenType.RIGHT, 591 TokenType.FULL, 592 } 593 594 JOIN_KINDS = { 595 TokenType.ANTI, 596 TokenType.CROSS, 597 TokenType.INNER, 598 TokenType.OUTER, 599 TokenType.SEMI, 600 TokenType.STRAIGHT_JOIN, 601 } 602 603 JOIN_HINTS: t.Set[str] = set() 604 605 LAMBDAS = { 606 TokenType.ARROW: lambda self, expressions: self.expression( 607 exp.Lambda, 608 this=self._replace_lambda( 609 self._parse_assignment(), 610 expressions, 611 ), 612 expressions=expressions, 613 ), 614 TokenType.FARROW: lambda self, expressions: self.expression( 615 exp.Kwarg, 616 this=exp.var(expressions[0].name), 617 expression=self._parse_assignment(), 618 ), 619 } 620 621 COLUMN_OPERATORS = { 622 TokenType.DOT: None, 623 TokenType.DCOLON: lambda self, this, to: self.expression( 624 exp.Cast if self.STRICT_CAST else exp.TryCast, 625 this=this, 626 to=to, 627 ), 628 TokenType.ARROW: lambda self, this, path: self.expression( 629 exp.JSONExtract, 630 this=this, 631 expression=self.dialect.to_json_path(path), 632 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 633 ), 634 TokenType.DARROW: lambda self, this, path: self.expression( 635 exp.JSONExtractScalar, 636 this=this, 637 expression=self.dialect.to_json_path(path), 638 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 639 ), 640 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 641 exp.JSONBExtract, 642 this=this, 643 expression=path, 644 ), 645 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 646 exp.JSONBExtractScalar, 647 this=this, 648 expression=path, 649 ), 650 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 651 exp.JSONBContains, 652 this=this, 653 expression=key, 654 ), 655 } 656 657 EXPRESSION_PARSERS = { 658 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 659 exp.Column: lambda self: self._parse_column(), 660 exp.Condition: lambda self: self._parse_assignment(), 661 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 662 exp.Expression: lambda self: self._parse_expression(), 663 exp.From: lambda self: self._parse_from(joins=True), 664 exp.Group: lambda self: self._parse_group(), 665 exp.Having: lambda self: self._parse_having(), 666 exp.Identifier: lambda self: self._parse_id_var(), 667 exp.Join: lambda self: self._parse_join(), 668 exp.Lambda: lambda self: self._parse_lambda(), 669 exp.Lateral: lambda self: self._parse_lateral(), 670 exp.Limit: lambda self: self._parse_limit(), 671 exp.Offset: lambda self: self._parse_offset(), 672 exp.Order: lambda self: self._parse_order(), 673 exp.Ordered: lambda self: self._parse_ordered(), 674 exp.Properties: lambda self: self._parse_properties(), 675 exp.Qualify: lambda self: self._parse_qualify(), 676 exp.Returning: lambda self: self._parse_returning(), 677 exp.Select: lambda self: self._parse_select(), 678 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 679 exp.Table: lambda self: self._parse_table_parts(), 680 exp.TableAlias: lambda self: self._parse_table_alias(), 681 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 682 exp.Where: lambda self: self._parse_where(), 683 exp.Window: lambda self: self._parse_named_window(), 684 exp.With: lambda self: self._parse_with(), 685 "JOIN_TYPE": lambda self: self._parse_join_parts(), 686 } 687 688 STATEMENT_PARSERS = { 689 TokenType.ALTER: lambda self: self._parse_alter(), 690 TokenType.BEGIN: lambda self: self._parse_transaction(), 691 TokenType.CACHE: lambda self: self._parse_cache(), 692 TokenType.COMMENT: lambda self: self._parse_comment(), 693 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 694 TokenType.COPY: lambda self: self._parse_copy(), 695 TokenType.CREATE: lambda self: self._parse_create(), 696 TokenType.DELETE: lambda self: self._parse_delete(), 697 TokenType.DESC: lambda self: self._parse_describe(), 698 TokenType.DESCRIBE: lambda self: self._parse_describe(), 699 TokenType.DROP: lambda self: self._parse_drop(), 700 TokenType.INSERT: lambda self: self._parse_insert(), 701 TokenType.KILL: lambda self: self._parse_kill(), 702 TokenType.LOAD: lambda self: self._parse_load(), 703 TokenType.MERGE: lambda self: self._parse_merge(), 704 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 705 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 706 TokenType.REFRESH: lambda self: self._parse_refresh(), 707 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 708 TokenType.SET: lambda self: self._parse_set(), 709 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 710 TokenType.UNCACHE: lambda self: self._parse_uncache(), 711 TokenType.UPDATE: lambda self: self._parse_update(), 712 TokenType.USE: lambda self: self.expression( 713 exp.Use, 714 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 715 this=self._parse_table(schema=False), 716 ), 717 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 718 } 719 720 UNARY_PARSERS = { 721 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 722 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 723 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 724 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 725 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 726 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 727 } 728 729 STRING_PARSERS = { 730 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 731 exp.RawString, this=token.text 732 ), 733 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 734 exp.National, this=token.text 735 ), 736 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 737 TokenType.STRING: lambda self, token: self.expression( 738 exp.Literal, this=token.text, is_string=True 739 ), 740 TokenType.UNICODE_STRING: lambda self, token: self.expression( 741 exp.UnicodeString, 742 this=token.text, 743 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 744 ), 745 } 746 747 NUMERIC_PARSERS = { 748 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 749 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 750 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 751 TokenType.NUMBER: lambda self, token: self.expression( 752 exp.Literal, this=token.text, is_string=False 753 ), 754 } 755 756 PRIMARY_PARSERS = { 757 **STRING_PARSERS, 758 **NUMERIC_PARSERS, 759 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 760 TokenType.NULL: lambda self, _: self.expression(exp.Null), 761 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 762 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 763 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 764 TokenType.STAR: lambda self, _: self.expression( 765 exp.Star, 766 **{ 767 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 768 "replace": self._parse_star_op("REPLACE"), 769 "rename": self._parse_star_op("RENAME"), 770 }, 771 ), 772 } 773 774 PLACEHOLDER_PARSERS = { 775 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 776 TokenType.PARAMETER: lambda self: self._parse_parameter(), 777 TokenType.COLON: lambda self: ( 778 self.expression(exp.Placeholder, this=self._prev.text) 779 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 780 else None 781 ), 782 } 783 784 RANGE_PARSERS = { 785 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 786 TokenType.GLOB: binary_range_parser(exp.Glob), 787 TokenType.ILIKE: binary_range_parser(exp.ILike), 788 TokenType.IN: lambda self, this: self._parse_in(this), 789 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 790 TokenType.IS: lambda self, this: self._parse_is(this), 791 TokenType.LIKE: binary_range_parser(exp.Like), 792 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 793 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 794 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 795 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 796 } 797 798 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 799 "ALLOWED_VALUES": lambda self: self.expression( 800 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 801 ), 802 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 803 "AUTO": lambda self: self._parse_auto_property(), 804 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 805 "BACKUP": lambda self: self.expression( 806 exp.BackupProperty, this=self._parse_var(any_token=True) 807 ), 808 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 809 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 810 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 811 "CHECKSUM": lambda self: self._parse_checksum(), 812 "CLUSTER BY": lambda self: self._parse_cluster(), 813 "CLUSTERED": lambda self: self._parse_clustered_by(), 814 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 815 exp.CollateProperty, **kwargs 816 ), 817 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 818 "CONTAINS": lambda self: self._parse_contains_property(), 819 "COPY": lambda self: self._parse_copy_property(), 820 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 821 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 822 "DEFINER": lambda self: self._parse_definer(), 823 "DETERMINISTIC": lambda self: self.expression( 824 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 825 ), 826 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 827 "DISTKEY": lambda self: self._parse_distkey(), 828 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 829 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 830 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 831 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 832 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 833 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 834 "FREESPACE": lambda self: self._parse_freespace(), 835 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 836 "HEAP": lambda self: self.expression(exp.HeapProperty), 837 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 838 "IMMUTABLE": lambda self: self.expression( 839 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 840 ), 841 "INHERITS": lambda self: self.expression( 842 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 843 ), 844 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 845 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 846 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 847 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 848 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 849 "LIKE": lambda self: self._parse_create_like(), 850 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 851 "LOCK": lambda self: self._parse_locking(), 852 "LOCKING": lambda self: self._parse_locking(), 853 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 854 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 855 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 856 "MODIFIES": lambda self: self._parse_modifies_property(), 857 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 858 "NO": lambda self: self._parse_no_property(), 859 "ON": lambda self: self._parse_on_property(), 860 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 861 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 862 "PARTITION": lambda self: self._parse_partitioned_of(), 863 "PARTITION BY": lambda self: self._parse_partitioned_by(), 864 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 865 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 866 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 867 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 868 "READS": lambda self: self._parse_reads_property(), 869 "REMOTE": lambda self: self._parse_remote_with_connection(), 870 "RETURNS": lambda self: self._parse_returns(), 871 "STRICT": lambda self: self.expression(exp.StrictProperty), 872 "ROW": lambda self: self._parse_row(), 873 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 874 "SAMPLE": lambda self: self.expression( 875 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 876 ), 877 "SECURE": lambda self: self.expression(exp.SecureProperty), 878 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 879 "SETTINGS": lambda self: self.expression( 880 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 881 ), 882 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 883 "SORTKEY": lambda self: self._parse_sortkey(), 884 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 885 "STABLE": lambda self: self.expression( 886 exp.StabilityProperty, this=exp.Literal.string("STABLE") 887 ), 888 "STORED": lambda self: self._parse_stored(), 889 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 890 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 891 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 892 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 893 "TO": lambda self: self._parse_to_table(), 894 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 895 "TRANSFORM": lambda self: self.expression( 896 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 897 ), 898 "TTL": lambda self: self._parse_ttl(), 899 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 900 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 901 "VOLATILE": lambda self: self._parse_volatile_property(), 902 "WITH": lambda self: self._parse_with_property(), 903 } 904 905 CONSTRAINT_PARSERS = { 906 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 907 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 908 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 909 "CHARACTER SET": lambda self: self.expression( 910 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 911 ), 912 "CHECK": lambda self: self.expression( 913 exp.CheckColumnConstraint, 914 this=self._parse_wrapped(self._parse_assignment), 915 enforced=self._match_text_seq("ENFORCED"), 916 ), 917 "COLLATE": lambda self: self.expression( 918 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 919 ), 920 "COMMENT": lambda self: self.expression( 921 exp.CommentColumnConstraint, this=self._parse_string() 922 ), 923 "COMPRESS": lambda self: self._parse_compress(), 924 "CLUSTERED": lambda self: self.expression( 925 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 926 ), 927 "NONCLUSTERED": lambda self: self.expression( 928 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 929 ), 930 "DEFAULT": lambda self: self.expression( 931 exp.DefaultColumnConstraint, this=self._parse_bitwise() 932 ), 933 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 934 "EPHEMERAL": lambda self: self.expression( 935 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 936 ), 937 "EXCLUDE": lambda self: self.expression( 938 exp.ExcludeColumnConstraint, this=self._parse_index_params() 939 ), 940 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 941 "FORMAT": lambda self: self.expression( 942 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 943 ), 944 "GENERATED": lambda self: self._parse_generated_as_identity(), 945 "IDENTITY": lambda self: self._parse_auto_increment(), 946 "INLINE": lambda self: self._parse_inline(), 947 "LIKE": lambda self: self._parse_create_like(), 948 "NOT": lambda self: self._parse_not_constraint(), 949 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 950 "ON": lambda self: ( 951 self._match(TokenType.UPDATE) 952 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 953 ) 954 or self.expression(exp.OnProperty, this=self._parse_id_var()), 955 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 956 "PERIOD": lambda self: self._parse_period_for_system_time(), 957 "PRIMARY KEY": lambda self: self._parse_primary_key(), 958 "REFERENCES": lambda self: self._parse_references(match=False), 959 "TITLE": lambda self: self.expression( 960 exp.TitleColumnConstraint, this=self._parse_var_or_string() 961 ), 962 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 963 "UNIQUE": lambda self: self._parse_unique(), 964 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 965 "WITH": lambda self: self.expression( 966 exp.Properties, expressions=self._parse_wrapped_properties() 967 ), 968 } 969 970 ALTER_PARSERS = { 971 "ADD": lambda self: self._parse_alter_table_add(), 972 "ALTER": lambda self: self._parse_alter_table_alter(), 973 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 974 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 975 "DROP": lambda self: self._parse_alter_table_drop(), 976 "RENAME": lambda self: self._parse_alter_table_rename(), 977 "SET": lambda self: self._parse_alter_table_set(), 978 } 979 980 ALTER_ALTER_PARSERS = { 981 "DISTKEY": lambda self: self._parse_alter_diststyle(), 982 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 983 "SORTKEY": lambda self: self._parse_alter_sortkey(), 984 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 985 } 986 987 SCHEMA_UNNAMED_CONSTRAINTS = { 988 "CHECK", 989 "EXCLUDE", 990 "FOREIGN KEY", 991 "LIKE", 992 "PERIOD", 993 "PRIMARY KEY", 994 "UNIQUE", 995 } 996 997 NO_PAREN_FUNCTION_PARSERS = { 998 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 999 "CASE": lambda self: self._parse_case(), 1000 "CONNECT_BY_ROOT": lambda self: self.expression( 1001 exp.ConnectByRoot, this=self._parse_column() 1002 ), 1003 "IF": lambda self: self._parse_if(), 1004 "NEXT": lambda self: self._parse_next_value_for(), 1005 } 1006 1007 INVALID_FUNC_NAME_TOKENS = { 1008 TokenType.IDENTIFIER, 1009 TokenType.STRING, 1010 } 1011 1012 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1013 1014 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1015 1016 FUNCTION_PARSERS = { 1017 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1018 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1019 "DECODE": lambda self: self._parse_decode(), 1020 "EXTRACT": lambda self: self._parse_extract(), 1021 "GAP_FILL": lambda self: self._parse_gap_fill(), 1022 "JSON_OBJECT": lambda self: self._parse_json_object(), 1023 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1024 "JSON_TABLE": lambda self: self._parse_json_table(), 1025 "MATCH": lambda self: self._parse_match_against(), 1026 "OPENJSON": lambda self: self._parse_open_json(), 1027 "POSITION": lambda self: self._parse_position(), 1028 "PREDICT": lambda self: self._parse_predict(), 1029 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1030 "STRING_AGG": lambda self: self._parse_string_agg(), 1031 "SUBSTRING": lambda self: self._parse_substring(), 1032 "TRIM": lambda self: self._parse_trim(), 1033 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1034 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1035 } 1036 1037 QUERY_MODIFIER_PARSERS = { 1038 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1039 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1040 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1041 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1042 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1043 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1044 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1045 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1046 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1047 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1048 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1049 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1050 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1051 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1052 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1053 TokenType.CLUSTER_BY: lambda self: ( 1054 "cluster", 1055 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1056 ), 1057 TokenType.DISTRIBUTE_BY: lambda self: ( 1058 "distribute", 1059 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1060 ), 1061 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1062 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1063 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1064 } 1065 1066 SET_PARSERS = { 1067 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1068 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1069 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1070 "TRANSACTION": lambda self: self._parse_set_transaction(), 1071 } 1072 1073 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1074 1075 TYPE_LITERAL_PARSERS = { 1076 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1077 } 1078 1079 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1080 1081 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1082 1083 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1084 1085 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1086 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1087 "ISOLATION": ( 1088 ("LEVEL", "REPEATABLE", "READ"), 1089 ("LEVEL", "READ", "COMMITTED"), 1090 ("LEVEL", "READ", "UNCOMITTED"), 1091 ("LEVEL", "SERIALIZABLE"), 1092 ), 1093 "READ": ("WRITE", "ONLY"), 1094 } 1095 1096 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1097 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1098 ) 1099 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1100 1101 CREATE_SEQUENCE: OPTIONS_TYPE = { 1102 "SCALE": ("EXTEND", "NOEXTEND"), 1103 "SHARD": ("EXTEND", "NOEXTEND"), 1104 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1105 **dict.fromkeys( 1106 ( 1107 "SESSION", 1108 "GLOBAL", 1109 "KEEP", 1110 "NOKEEP", 1111 "ORDER", 1112 "NOORDER", 1113 "NOCACHE", 1114 "CYCLE", 1115 "NOCYCLE", 1116 "NOMINVALUE", 1117 "NOMAXVALUE", 1118 "NOSCALE", 1119 "NOSHARD", 1120 ), 1121 tuple(), 1122 ), 1123 } 1124 1125 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1126 1127 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1128 1129 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1130 1131 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1132 1133 CLONE_KEYWORDS = {"CLONE", "COPY"} 1134 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1135 1136 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1137 1138 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1139 1140 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1141 1142 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1143 1144 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1145 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1146 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1147 1148 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1149 1150 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1151 1152 ADD_CONSTRAINT_TOKENS = { 1153 TokenType.CONSTRAINT, 1154 TokenType.FOREIGN_KEY, 1155 TokenType.INDEX, 1156 TokenType.KEY, 1157 TokenType.PRIMARY_KEY, 1158 TokenType.UNIQUE, 1159 } 1160 1161 DISTINCT_TOKENS = {TokenType.DISTINCT} 1162 1163 NULL_TOKENS = {TokenType.NULL} 1164 1165 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1166 1167 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1168 1169 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1170 1171 STRICT_CAST = True 1172 1173 PREFIXED_PIVOT_COLUMNS = False 1174 IDENTIFY_PIVOT_STRINGS = False 1175 1176 LOG_DEFAULTS_TO_LN = False 1177 1178 # Whether ADD is present for each column added by ALTER TABLE 1179 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1180 1181 # Whether the table sample clause expects CSV syntax 1182 TABLESAMPLE_CSV = False 1183 1184 # The default method used for table sampling 1185 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1186 1187 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1188 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1189 1190 # Whether the TRIM function expects the characters to trim as its first argument 1191 TRIM_PATTERN_FIRST = False 1192 1193 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1194 STRING_ALIASES = False 1195 1196 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1197 MODIFIERS_ATTACHED_TO_SET_OP = True 1198 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1199 1200 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1201 NO_PAREN_IF_COMMANDS = True 1202 1203 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1204 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1205 1206 # Whether the `:` operator is used to extract a value from a JSON document 1207 COLON_IS_JSON_EXTRACT = False 1208 1209 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1210 # If this is True and '(' is not found, the keyword will be treated as an identifier 1211 VALUES_FOLLOWED_BY_PAREN = True 1212 1213 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1214 SUPPORTS_IMPLICIT_UNNEST = False 1215 1216 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1217 INTERVAL_SPANS = True 1218 1219 # Whether a PARTITION clause can follow a table reference 1220 SUPPORTS_PARTITION_SELECTION = False 1221 1222 __slots__ = ( 1223 "error_level", 1224 "error_message_context", 1225 "max_errors", 1226 "dialect", 1227 "sql", 1228 "errors", 1229 "_tokens", 1230 "_index", 1231 "_curr", 1232 "_next", 1233 "_prev", 1234 "_prev_comments", 1235 ) 1236 1237 # Autofilled 1238 SHOW_TRIE: t.Dict = {} 1239 SET_TRIE: t.Dict = {} 1240 1241 def __init__( 1242 self, 1243 error_level: t.Optional[ErrorLevel] = None, 1244 error_message_context: int = 100, 1245 max_errors: int = 3, 1246 dialect: DialectType = None, 1247 ): 1248 from sqlglot.dialects import Dialect 1249 1250 self.error_level = error_level or ErrorLevel.IMMEDIATE 1251 self.error_message_context = error_message_context 1252 self.max_errors = max_errors 1253 self.dialect = Dialect.get_or_raise(dialect) 1254 self.reset() 1255 1256 def reset(self): 1257 self.sql = "" 1258 self.errors = [] 1259 self._tokens = [] 1260 self._index = 0 1261 self._curr = None 1262 self._next = None 1263 self._prev = None 1264 self._prev_comments = None 1265 1266 def parse( 1267 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1268 ) -> t.List[t.Optional[exp.Expression]]: 1269 """ 1270 Parses a list of tokens and returns a list of syntax trees, one tree 1271 per parsed SQL statement. 1272 1273 Args: 1274 raw_tokens: The list of tokens. 1275 sql: The original SQL string, used to produce helpful debug messages. 1276 1277 Returns: 1278 The list of the produced syntax trees. 1279 """ 1280 return self._parse( 1281 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1282 ) 1283 1284 def parse_into( 1285 self, 1286 expression_types: exp.IntoType, 1287 raw_tokens: t.List[Token], 1288 sql: t.Optional[str] = None, 1289 ) -> t.List[t.Optional[exp.Expression]]: 1290 """ 1291 Parses a list of tokens into a given Expression type. If a collection of Expression 1292 types is given instead, this method will try to parse the token list into each one 1293 of them, stopping at the first for which the parsing succeeds. 1294 1295 Args: 1296 expression_types: The expression type(s) to try and parse the token list into. 1297 raw_tokens: The list of tokens. 1298 sql: The original SQL string, used to produce helpful debug messages. 1299 1300 Returns: 1301 The target Expression. 1302 """ 1303 errors = [] 1304 for expression_type in ensure_list(expression_types): 1305 parser = self.EXPRESSION_PARSERS.get(expression_type) 1306 if not parser: 1307 raise TypeError(f"No parser registered for {expression_type}") 1308 1309 try: 1310 return self._parse(parser, raw_tokens, sql) 1311 except ParseError as e: 1312 e.errors[0]["into_expression"] = expression_type 1313 errors.append(e) 1314 1315 raise ParseError( 1316 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1317 errors=merge_errors(errors), 1318 ) from errors[-1] 1319 1320 def _parse( 1321 self, 1322 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1323 raw_tokens: t.List[Token], 1324 sql: t.Optional[str] = None, 1325 ) -> t.List[t.Optional[exp.Expression]]: 1326 self.reset() 1327 self.sql = sql or "" 1328 1329 total = len(raw_tokens) 1330 chunks: t.List[t.List[Token]] = [[]] 1331 1332 for i, token in enumerate(raw_tokens): 1333 if token.token_type == TokenType.SEMICOLON: 1334 if token.comments: 1335 chunks.append([token]) 1336 1337 if i < total - 1: 1338 chunks.append([]) 1339 else: 1340 chunks[-1].append(token) 1341 1342 expressions = [] 1343 1344 for tokens in chunks: 1345 self._index = -1 1346 self._tokens = tokens 1347 self._advance() 1348 1349 expressions.append(parse_method(self)) 1350 1351 if self._index < len(self._tokens): 1352 self.raise_error("Invalid expression / Unexpected token") 1353 1354 self.check_errors() 1355 1356 return expressions 1357 1358 def check_errors(self) -> None: 1359 """Logs or raises any found errors, depending on the chosen error level setting.""" 1360 if self.error_level == ErrorLevel.WARN: 1361 for error in self.errors: 1362 logger.error(str(error)) 1363 elif self.error_level == ErrorLevel.RAISE and self.errors: 1364 raise ParseError( 1365 concat_messages(self.errors, self.max_errors), 1366 errors=merge_errors(self.errors), 1367 ) 1368 1369 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1370 """ 1371 Appends an error in the list of recorded errors or raises it, depending on the chosen 1372 error level setting. 1373 """ 1374 token = token or self._curr or self._prev or Token.string("") 1375 start = token.start 1376 end = token.end + 1 1377 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1378 highlight = self.sql[start:end] 1379 end_context = self.sql[end : end + self.error_message_context] 1380 1381 error = ParseError.new( 1382 f"{message}. Line {token.line}, Col: {token.col}.\n" 1383 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1384 description=message, 1385 line=token.line, 1386 col=token.col, 1387 start_context=start_context, 1388 highlight=highlight, 1389 end_context=end_context, 1390 ) 1391 1392 if self.error_level == ErrorLevel.IMMEDIATE: 1393 raise error 1394 1395 self.errors.append(error) 1396 1397 def expression( 1398 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1399 ) -> E: 1400 """ 1401 Creates a new, validated Expression. 1402 1403 Args: 1404 exp_class: The expression class to instantiate. 1405 comments: An optional list of comments to attach to the expression. 1406 kwargs: The arguments to set for the expression along with their respective values. 1407 1408 Returns: 1409 The target expression. 1410 """ 1411 instance = exp_class(**kwargs) 1412 instance.add_comments(comments) if comments else self._add_comments(instance) 1413 return self.validate_expression(instance) 1414 1415 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1416 if expression and self._prev_comments: 1417 expression.add_comments(self._prev_comments) 1418 self._prev_comments = None 1419 1420 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1421 """ 1422 Validates an Expression, making sure that all its mandatory arguments are set. 1423 1424 Args: 1425 expression: The expression to validate. 1426 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1427 1428 Returns: 1429 The validated expression. 1430 """ 1431 if self.error_level != ErrorLevel.IGNORE: 1432 for error_message in expression.error_messages(args): 1433 self.raise_error(error_message) 1434 1435 return expression 1436 1437 def _find_sql(self, start: Token, end: Token) -> str: 1438 return self.sql[start.start : end.end + 1] 1439 1440 def _is_connected(self) -> bool: 1441 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1442 1443 def _advance(self, times: int = 1) -> None: 1444 self._index += times 1445 self._curr = seq_get(self._tokens, self._index) 1446 self._next = seq_get(self._tokens, self._index + 1) 1447 1448 if self._index > 0: 1449 self._prev = self._tokens[self._index - 1] 1450 self._prev_comments = self._prev.comments 1451 else: 1452 self._prev = None 1453 self._prev_comments = None 1454 1455 def _retreat(self, index: int) -> None: 1456 if index != self._index: 1457 self._advance(index - self._index) 1458 1459 def _warn_unsupported(self) -> None: 1460 if len(self._tokens) <= 1: 1461 return 1462 1463 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1464 # interested in emitting a warning for the one being currently processed. 1465 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1466 1467 logger.warning( 1468 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1469 ) 1470 1471 def _parse_command(self) -> exp.Command: 1472 self._warn_unsupported() 1473 return self.expression( 1474 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1475 ) 1476 1477 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1478 """ 1479 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1480 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1481 the parser state accordingly 1482 """ 1483 index = self._index 1484 error_level = self.error_level 1485 1486 self.error_level = ErrorLevel.IMMEDIATE 1487 try: 1488 this = parse_method() 1489 except ParseError: 1490 this = None 1491 finally: 1492 if not this or retreat: 1493 self._retreat(index) 1494 self.error_level = error_level 1495 1496 return this 1497 1498 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1499 start = self._prev 1500 exists = self._parse_exists() if allow_exists else None 1501 1502 self._match(TokenType.ON) 1503 1504 materialized = self._match_text_seq("MATERIALIZED") 1505 kind = self._match_set(self.CREATABLES) and self._prev 1506 if not kind: 1507 return self._parse_as_command(start) 1508 1509 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1510 this = self._parse_user_defined_function(kind=kind.token_type) 1511 elif kind.token_type == TokenType.TABLE: 1512 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1513 elif kind.token_type == TokenType.COLUMN: 1514 this = self._parse_column() 1515 else: 1516 this = self._parse_id_var() 1517 1518 self._match(TokenType.IS) 1519 1520 return self.expression( 1521 exp.Comment, 1522 this=this, 1523 kind=kind.text, 1524 expression=self._parse_string(), 1525 exists=exists, 1526 materialized=materialized, 1527 ) 1528 1529 def _parse_to_table( 1530 self, 1531 ) -> exp.ToTableProperty: 1532 table = self._parse_table_parts(schema=True) 1533 return self.expression(exp.ToTableProperty, this=table) 1534 1535 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1536 def _parse_ttl(self) -> exp.Expression: 1537 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1538 this = self._parse_bitwise() 1539 1540 if self._match_text_seq("DELETE"): 1541 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1542 if self._match_text_seq("RECOMPRESS"): 1543 return self.expression( 1544 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1545 ) 1546 if self._match_text_seq("TO", "DISK"): 1547 return self.expression( 1548 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1549 ) 1550 if self._match_text_seq("TO", "VOLUME"): 1551 return self.expression( 1552 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1553 ) 1554 1555 return this 1556 1557 expressions = self._parse_csv(_parse_ttl_action) 1558 where = self._parse_where() 1559 group = self._parse_group() 1560 1561 aggregates = None 1562 if group and self._match(TokenType.SET): 1563 aggregates = self._parse_csv(self._parse_set_item) 1564 1565 return self.expression( 1566 exp.MergeTreeTTL, 1567 expressions=expressions, 1568 where=where, 1569 group=group, 1570 aggregates=aggregates, 1571 ) 1572 1573 def _parse_statement(self) -> t.Optional[exp.Expression]: 1574 if self._curr is None: 1575 return None 1576 1577 if self._match_set(self.STATEMENT_PARSERS): 1578 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1579 1580 if self._match_set(self.dialect.tokenizer.COMMANDS): 1581 return self._parse_command() 1582 1583 expression = self._parse_expression() 1584 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1585 return self._parse_query_modifiers(expression) 1586 1587 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1588 start = self._prev 1589 temporary = self._match(TokenType.TEMPORARY) 1590 materialized = self._match_text_seq("MATERIALIZED") 1591 1592 kind = self._match_set(self.CREATABLES) and self._prev.text 1593 if not kind: 1594 return self._parse_as_command(start) 1595 1596 if_exists = exists or self._parse_exists() 1597 table = self._parse_table_parts( 1598 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1599 ) 1600 1601 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1602 1603 if self._match(TokenType.L_PAREN, advance=False): 1604 expressions = self._parse_wrapped_csv(self._parse_types) 1605 else: 1606 expressions = None 1607 1608 return self.expression( 1609 exp.Drop, 1610 comments=start.comments, 1611 exists=if_exists, 1612 this=table, 1613 expressions=expressions, 1614 kind=kind.upper(), 1615 temporary=temporary, 1616 materialized=materialized, 1617 cascade=self._match_text_seq("CASCADE"), 1618 constraints=self._match_text_seq("CONSTRAINTS"), 1619 purge=self._match_text_seq("PURGE"), 1620 cluster=cluster, 1621 ) 1622 1623 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1624 return ( 1625 self._match_text_seq("IF") 1626 and (not not_ or self._match(TokenType.NOT)) 1627 and self._match(TokenType.EXISTS) 1628 ) 1629 1630 def _parse_create(self) -> exp.Create | exp.Command: 1631 # Note: this can't be None because we've matched a statement parser 1632 start = self._prev 1633 comments = self._prev_comments 1634 1635 replace = ( 1636 start.token_type == TokenType.REPLACE 1637 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1638 or self._match_pair(TokenType.OR, TokenType.ALTER) 1639 ) 1640 1641 unique = self._match(TokenType.UNIQUE) 1642 1643 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1644 self._advance() 1645 1646 properties = None 1647 create_token = self._match_set(self.CREATABLES) and self._prev 1648 1649 if not create_token: 1650 # exp.Properties.Location.POST_CREATE 1651 properties = self._parse_properties() 1652 create_token = self._match_set(self.CREATABLES) and self._prev 1653 1654 if not properties or not create_token: 1655 return self._parse_as_command(start) 1656 1657 exists = self._parse_exists(not_=True) 1658 this = None 1659 expression: t.Optional[exp.Expression] = None 1660 indexes = None 1661 no_schema_binding = None 1662 begin = None 1663 end = None 1664 clone = None 1665 1666 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1667 nonlocal properties 1668 if properties and temp_props: 1669 properties.expressions.extend(temp_props.expressions) 1670 elif temp_props: 1671 properties = temp_props 1672 1673 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1674 this = self._parse_user_defined_function(kind=create_token.token_type) 1675 1676 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1677 extend_props(self._parse_properties()) 1678 1679 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1680 extend_props(self._parse_properties()) 1681 1682 if not expression: 1683 if self._match(TokenType.COMMAND): 1684 expression = self._parse_as_command(self._prev) 1685 else: 1686 begin = self._match(TokenType.BEGIN) 1687 return_ = self._match_text_seq("RETURN") 1688 1689 if self._match(TokenType.STRING, advance=False): 1690 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1691 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1692 expression = self._parse_string() 1693 extend_props(self._parse_properties()) 1694 else: 1695 expression = self._parse_statement() 1696 1697 end = self._match_text_seq("END") 1698 1699 if return_: 1700 expression = self.expression(exp.Return, this=expression) 1701 elif create_token.token_type == TokenType.INDEX: 1702 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1703 if not self._match(TokenType.ON): 1704 index = self._parse_id_var() 1705 anonymous = False 1706 else: 1707 index = None 1708 anonymous = True 1709 1710 this = self._parse_index(index=index, anonymous=anonymous) 1711 elif create_token.token_type in self.DB_CREATABLES: 1712 table_parts = self._parse_table_parts( 1713 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1714 ) 1715 1716 # exp.Properties.Location.POST_NAME 1717 self._match(TokenType.COMMA) 1718 extend_props(self._parse_properties(before=True)) 1719 1720 this = self._parse_schema(this=table_parts) 1721 1722 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1723 extend_props(self._parse_properties()) 1724 1725 self._match(TokenType.ALIAS) 1726 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1727 # exp.Properties.Location.POST_ALIAS 1728 extend_props(self._parse_properties()) 1729 1730 if create_token.token_type == TokenType.SEQUENCE: 1731 expression = self._parse_types() 1732 extend_props(self._parse_properties()) 1733 else: 1734 expression = self._parse_ddl_select() 1735 1736 if create_token.token_type == TokenType.TABLE: 1737 # exp.Properties.Location.POST_EXPRESSION 1738 extend_props(self._parse_properties()) 1739 1740 indexes = [] 1741 while True: 1742 index = self._parse_index() 1743 1744 # exp.Properties.Location.POST_INDEX 1745 extend_props(self._parse_properties()) 1746 1747 if not index: 1748 break 1749 else: 1750 self._match(TokenType.COMMA) 1751 indexes.append(index) 1752 elif create_token.token_type == TokenType.VIEW: 1753 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1754 no_schema_binding = True 1755 1756 shallow = self._match_text_seq("SHALLOW") 1757 1758 if self._match_texts(self.CLONE_KEYWORDS): 1759 copy = self._prev.text.lower() == "copy" 1760 clone = self.expression( 1761 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1762 ) 1763 1764 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1765 return self._parse_as_command(start) 1766 1767 return self.expression( 1768 exp.Create, 1769 comments=comments, 1770 this=this, 1771 kind=create_token.text.upper(), 1772 replace=replace, 1773 unique=unique, 1774 expression=expression, 1775 exists=exists, 1776 properties=properties, 1777 indexes=indexes, 1778 no_schema_binding=no_schema_binding, 1779 begin=begin, 1780 end=end, 1781 clone=clone, 1782 ) 1783 1784 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1785 seq = exp.SequenceProperties() 1786 1787 options = [] 1788 index = self._index 1789 1790 while self._curr: 1791 self._match(TokenType.COMMA) 1792 if self._match_text_seq("INCREMENT"): 1793 self._match_text_seq("BY") 1794 self._match_text_seq("=") 1795 seq.set("increment", self._parse_term()) 1796 elif self._match_text_seq("MINVALUE"): 1797 seq.set("minvalue", self._parse_term()) 1798 elif self._match_text_seq("MAXVALUE"): 1799 seq.set("maxvalue", self._parse_term()) 1800 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1801 self._match_text_seq("=") 1802 seq.set("start", self._parse_term()) 1803 elif self._match_text_seq("CACHE"): 1804 # T-SQL allows empty CACHE which is initialized dynamically 1805 seq.set("cache", self._parse_number() or True) 1806 elif self._match_text_seq("OWNED", "BY"): 1807 # "OWNED BY NONE" is the default 1808 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1809 else: 1810 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1811 if opt: 1812 options.append(opt) 1813 else: 1814 break 1815 1816 seq.set("options", options if options else None) 1817 return None if self._index == index else seq 1818 1819 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1820 # only used for teradata currently 1821 self._match(TokenType.COMMA) 1822 1823 kwargs = { 1824 "no": self._match_text_seq("NO"), 1825 "dual": self._match_text_seq("DUAL"), 1826 "before": self._match_text_seq("BEFORE"), 1827 "default": self._match_text_seq("DEFAULT"), 1828 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1829 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1830 "after": self._match_text_seq("AFTER"), 1831 "minimum": self._match_texts(("MIN", "MINIMUM")), 1832 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1833 } 1834 1835 if self._match_texts(self.PROPERTY_PARSERS): 1836 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1837 try: 1838 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1839 except TypeError: 1840 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1841 1842 return None 1843 1844 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1845 return self._parse_wrapped_csv(self._parse_property) 1846 1847 def _parse_property(self) -> t.Optional[exp.Expression]: 1848 if self._match_texts(self.PROPERTY_PARSERS): 1849 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1850 1851 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1852 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1853 1854 if self._match_text_seq("COMPOUND", "SORTKEY"): 1855 return self._parse_sortkey(compound=True) 1856 1857 if self._match_text_seq("SQL", "SECURITY"): 1858 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1859 1860 index = self._index 1861 key = self._parse_column() 1862 1863 if not self._match(TokenType.EQ): 1864 self._retreat(index) 1865 return self._parse_sequence_properties() 1866 1867 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1868 if isinstance(key, exp.Column): 1869 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1870 1871 value = self._parse_bitwise() or self._parse_var(any_token=True) 1872 1873 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1874 if isinstance(value, exp.Column): 1875 value = exp.var(value.name) 1876 1877 return self.expression(exp.Property, this=key, value=value) 1878 1879 def _parse_stored(self) -> exp.FileFormatProperty: 1880 self._match(TokenType.ALIAS) 1881 1882 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1883 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1884 1885 return self.expression( 1886 exp.FileFormatProperty, 1887 this=( 1888 self.expression( 1889 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1890 ) 1891 if input_format or output_format 1892 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1893 ), 1894 ) 1895 1896 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1897 field = self._parse_field() 1898 if isinstance(field, exp.Identifier) and not field.quoted: 1899 field = exp.var(field) 1900 1901 return field 1902 1903 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1904 self._match(TokenType.EQ) 1905 self._match(TokenType.ALIAS) 1906 1907 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1908 1909 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1910 properties = [] 1911 while True: 1912 if before: 1913 prop = self._parse_property_before() 1914 else: 1915 prop = self._parse_property() 1916 if not prop: 1917 break 1918 for p in ensure_list(prop): 1919 properties.append(p) 1920 1921 if properties: 1922 return self.expression(exp.Properties, expressions=properties) 1923 1924 return None 1925 1926 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1927 return self.expression( 1928 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1929 ) 1930 1931 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1932 if self._index >= 2: 1933 pre_volatile_token = self._tokens[self._index - 2] 1934 else: 1935 pre_volatile_token = None 1936 1937 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1938 return exp.VolatileProperty() 1939 1940 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1941 1942 def _parse_retention_period(self) -> exp.Var: 1943 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1944 number = self._parse_number() 1945 number_str = f"{number} " if number else "" 1946 unit = self._parse_var(any_token=True) 1947 return exp.var(f"{number_str}{unit}") 1948 1949 def _parse_system_versioning_property( 1950 self, with_: bool = False 1951 ) -> exp.WithSystemVersioningProperty: 1952 self._match(TokenType.EQ) 1953 prop = self.expression( 1954 exp.WithSystemVersioningProperty, 1955 **{ # type: ignore 1956 "on": True, 1957 "with": with_, 1958 }, 1959 ) 1960 1961 if self._match_text_seq("OFF"): 1962 prop.set("on", False) 1963 return prop 1964 1965 self._match(TokenType.ON) 1966 if self._match(TokenType.L_PAREN): 1967 while self._curr and not self._match(TokenType.R_PAREN): 1968 if self._match_text_seq("HISTORY_TABLE", "="): 1969 prop.set("this", self._parse_table_parts()) 1970 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1971 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1972 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1973 prop.set("retention_period", self._parse_retention_period()) 1974 1975 self._match(TokenType.COMMA) 1976 1977 return prop 1978 1979 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1980 self._match(TokenType.EQ) 1981 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1982 prop = self.expression(exp.DataDeletionProperty, on=on) 1983 1984 if self._match(TokenType.L_PAREN): 1985 while self._curr and not self._match(TokenType.R_PAREN): 1986 if self._match_text_seq("FILTER_COLUMN", "="): 1987 prop.set("filter_column", self._parse_column()) 1988 elif self._match_text_seq("RETENTION_PERIOD", "="): 1989 prop.set("retention_period", self._parse_retention_period()) 1990 1991 self._match(TokenType.COMMA) 1992 1993 return prop 1994 1995 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1996 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1997 prop = self._parse_system_versioning_property(with_=True) 1998 self._match_r_paren() 1999 return prop 2000 2001 if self._match(TokenType.L_PAREN, advance=False): 2002 return self._parse_wrapped_properties() 2003 2004 if self._match_text_seq("JOURNAL"): 2005 return self._parse_withjournaltable() 2006 2007 if self._match_texts(self.VIEW_ATTRIBUTES): 2008 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2009 2010 if self._match_text_seq("DATA"): 2011 return self._parse_withdata(no=False) 2012 elif self._match_text_seq("NO", "DATA"): 2013 return self._parse_withdata(no=True) 2014 2015 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2016 return self._parse_serde_properties(with_=True) 2017 2018 if not self._next: 2019 return None 2020 2021 return self._parse_withisolatedloading() 2022 2023 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2024 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2025 self._match(TokenType.EQ) 2026 2027 user = self._parse_id_var() 2028 self._match(TokenType.PARAMETER) 2029 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2030 2031 if not user or not host: 2032 return None 2033 2034 return exp.DefinerProperty(this=f"{user}@{host}") 2035 2036 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2037 self._match(TokenType.TABLE) 2038 self._match(TokenType.EQ) 2039 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2040 2041 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2042 return self.expression(exp.LogProperty, no=no) 2043 2044 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2045 return self.expression(exp.JournalProperty, **kwargs) 2046 2047 def _parse_checksum(self) -> exp.ChecksumProperty: 2048 self._match(TokenType.EQ) 2049 2050 on = None 2051 if self._match(TokenType.ON): 2052 on = True 2053 elif self._match_text_seq("OFF"): 2054 on = False 2055 2056 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2057 2058 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2059 return self.expression( 2060 exp.Cluster, 2061 expressions=( 2062 self._parse_wrapped_csv(self._parse_ordered) 2063 if wrapped 2064 else self._parse_csv(self._parse_ordered) 2065 ), 2066 ) 2067 2068 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2069 self._match_text_seq("BY") 2070 2071 self._match_l_paren() 2072 expressions = self._parse_csv(self._parse_column) 2073 self._match_r_paren() 2074 2075 if self._match_text_seq("SORTED", "BY"): 2076 self._match_l_paren() 2077 sorted_by = self._parse_csv(self._parse_ordered) 2078 self._match_r_paren() 2079 else: 2080 sorted_by = None 2081 2082 self._match(TokenType.INTO) 2083 buckets = self._parse_number() 2084 self._match_text_seq("BUCKETS") 2085 2086 return self.expression( 2087 exp.ClusteredByProperty, 2088 expressions=expressions, 2089 sorted_by=sorted_by, 2090 buckets=buckets, 2091 ) 2092 2093 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2094 if not self._match_text_seq("GRANTS"): 2095 self._retreat(self._index - 1) 2096 return None 2097 2098 return self.expression(exp.CopyGrantsProperty) 2099 2100 def _parse_freespace(self) -> exp.FreespaceProperty: 2101 self._match(TokenType.EQ) 2102 return self.expression( 2103 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2104 ) 2105 2106 def _parse_mergeblockratio( 2107 self, no: bool = False, default: bool = False 2108 ) -> exp.MergeBlockRatioProperty: 2109 if self._match(TokenType.EQ): 2110 return self.expression( 2111 exp.MergeBlockRatioProperty, 2112 this=self._parse_number(), 2113 percent=self._match(TokenType.PERCENT), 2114 ) 2115 2116 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2117 2118 def _parse_datablocksize( 2119 self, 2120 default: t.Optional[bool] = None, 2121 minimum: t.Optional[bool] = None, 2122 maximum: t.Optional[bool] = None, 2123 ) -> exp.DataBlocksizeProperty: 2124 self._match(TokenType.EQ) 2125 size = self._parse_number() 2126 2127 units = None 2128 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2129 units = self._prev.text 2130 2131 return self.expression( 2132 exp.DataBlocksizeProperty, 2133 size=size, 2134 units=units, 2135 default=default, 2136 minimum=minimum, 2137 maximum=maximum, 2138 ) 2139 2140 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2141 self._match(TokenType.EQ) 2142 always = self._match_text_seq("ALWAYS") 2143 manual = self._match_text_seq("MANUAL") 2144 never = self._match_text_seq("NEVER") 2145 default = self._match_text_seq("DEFAULT") 2146 2147 autotemp = None 2148 if self._match_text_seq("AUTOTEMP"): 2149 autotemp = self._parse_schema() 2150 2151 return self.expression( 2152 exp.BlockCompressionProperty, 2153 always=always, 2154 manual=manual, 2155 never=never, 2156 default=default, 2157 autotemp=autotemp, 2158 ) 2159 2160 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2161 index = self._index 2162 no = self._match_text_seq("NO") 2163 concurrent = self._match_text_seq("CONCURRENT") 2164 2165 if not self._match_text_seq("ISOLATED", "LOADING"): 2166 self._retreat(index) 2167 return None 2168 2169 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2170 return self.expression( 2171 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2172 ) 2173 2174 def _parse_locking(self) -> exp.LockingProperty: 2175 if self._match(TokenType.TABLE): 2176 kind = "TABLE" 2177 elif self._match(TokenType.VIEW): 2178 kind = "VIEW" 2179 elif self._match(TokenType.ROW): 2180 kind = "ROW" 2181 elif self._match_text_seq("DATABASE"): 2182 kind = "DATABASE" 2183 else: 2184 kind = None 2185 2186 if kind in ("DATABASE", "TABLE", "VIEW"): 2187 this = self._parse_table_parts() 2188 else: 2189 this = None 2190 2191 if self._match(TokenType.FOR): 2192 for_or_in = "FOR" 2193 elif self._match(TokenType.IN): 2194 for_or_in = "IN" 2195 else: 2196 for_or_in = None 2197 2198 if self._match_text_seq("ACCESS"): 2199 lock_type = "ACCESS" 2200 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2201 lock_type = "EXCLUSIVE" 2202 elif self._match_text_seq("SHARE"): 2203 lock_type = "SHARE" 2204 elif self._match_text_seq("READ"): 2205 lock_type = "READ" 2206 elif self._match_text_seq("WRITE"): 2207 lock_type = "WRITE" 2208 elif self._match_text_seq("CHECKSUM"): 2209 lock_type = "CHECKSUM" 2210 else: 2211 lock_type = None 2212 2213 override = self._match_text_seq("OVERRIDE") 2214 2215 return self.expression( 2216 exp.LockingProperty, 2217 this=this, 2218 kind=kind, 2219 for_or_in=for_or_in, 2220 lock_type=lock_type, 2221 override=override, 2222 ) 2223 2224 def _parse_partition_by(self) -> t.List[exp.Expression]: 2225 if self._match(TokenType.PARTITION_BY): 2226 return self._parse_csv(self._parse_assignment) 2227 return [] 2228 2229 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2230 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2231 if self._match_text_seq("MINVALUE"): 2232 return exp.var("MINVALUE") 2233 if self._match_text_seq("MAXVALUE"): 2234 return exp.var("MAXVALUE") 2235 return self._parse_bitwise() 2236 2237 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2238 expression = None 2239 from_expressions = None 2240 to_expressions = None 2241 2242 if self._match(TokenType.IN): 2243 this = self._parse_wrapped_csv(self._parse_bitwise) 2244 elif self._match(TokenType.FROM): 2245 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2246 self._match_text_seq("TO") 2247 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2248 elif self._match_text_seq("WITH", "(", "MODULUS"): 2249 this = self._parse_number() 2250 self._match_text_seq(",", "REMAINDER") 2251 expression = self._parse_number() 2252 self._match_r_paren() 2253 else: 2254 self.raise_error("Failed to parse partition bound spec.") 2255 2256 return self.expression( 2257 exp.PartitionBoundSpec, 2258 this=this, 2259 expression=expression, 2260 from_expressions=from_expressions, 2261 to_expressions=to_expressions, 2262 ) 2263 2264 # https://www.postgresql.org/docs/current/sql-createtable.html 2265 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2266 if not self._match_text_seq("OF"): 2267 self._retreat(self._index - 1) 2268 return None 2269 2270 this = self._parse_table(schema=True) 2271 2272 if self._match(TokenType.DEFAULT): 2273 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2274 elif self._match_text_seq("FOR", "VALUES"): 2275 expression = self._parse_partition_bound_spec() 2276 else: 2277 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2278 2279 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2280 2281 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2282 self._match(TokenType.EQ) 2283 return self.expression( 2284 exp.PartitionedByProperty, 2285 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2286 ) 2287 2288 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2289 if self._match_text_seq("AND", "STATISTICS"): 2290 statistics = True 2291 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2292 statistics = False 2293 else: 2294 statistics = None 2295 2296 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2297 2298 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2299 if self._match_text_seq("SQL"): 2300 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2301 return None 2302 2303 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2304 if self._match_text_seq("SQL", "DATA"): 2305 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2306 return None 2307 2308 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2309 if self._match_text_seq("PRIMARY", "INDEX"): 2310 return exp.NoPrimaryIndexProperty() 2311 if self._match_text_seq("SQL"): 2312 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2313 return None 2314 2315 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2316 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2317 return exp.OnCommitProperty() 2318 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2319 return exp.OnCommitProperty(delete=True) 2320 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2321 2322 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2323 if self._match_text_seq("SQL", "DATA"): 2324 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2325 return None 2326 2327 def _parse_distkey(self) -> exp.DistKeyProperty: 2328 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2329 2330 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2331 table = self._parse_table(schema=True) 2332 2333 options = [] 2334 while self._match_texts(("INCLUDING", "EXCLUDING")): 2335 this = self._prev.text.upper() 2336 2337 id_var = self._parse_id_var() 2338 if not id_var: 2339 return None 2340 2341 options.append( 2342 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2343 ) 2344 2345 return self.expression(exp.LikeProperty, this=table, expressions=options) 2346 2347 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2348 return self.expression( 2349 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2350 ) 2351 2352 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2353 self._match(TokenType.EQ) 2354 return self.expression( 2355 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2356 ) 2357 2358 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2359 self._match_text_seq("WITH", "CONNECTION") 2360 return self.expression( 2361 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2362 ) 2363 2364 def _parse_returns(self) -> exp.ReturnsProperty: 2365 value: t.Optional[exp.Expression] 2366 null = None 2367 is_table = self._match(TokenType.TABLE) 2368 2369 if is_table: 2370 if self._match(TokenType.LT): 2371 value = self.expression( 2372 exp.Schema, 2373 this="TABLE", 2374 expressions=self._parse_csv(self._parse_struct_types), 2375 ) 2376 if not self._match(TokenType.GT): 2377 self.raise_error("Expecting >") 2378 else: 2379 value = self._parse_schema(exp.var("TABLE")) 2380 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2381 null = True 2382 value = None 2383 else: 2384 value = self._parse_types() 2385 2386 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2387 2388 def _parse_describe(self) -> exp.Describe: 2389 kind = self._match_set(self.CREATABLES) and self._prev.text 2390 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2391 if self._match(TokenType.DOT): 2392 style = None 2393 self._retreat(self._index - 2) 2394 this = self._parse_table(schema=True) 2395 properties = self._parse_properties() 2396 expressions = properties.expressions if properties else None 2397 return self.expression( 2398 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2399 ) 2400 2401 def _parse_insert(self) -> exp.Insert: 2402 comments = ensure_list(self._prev_comments) 2403 hint = self._parse_hint() 2404 overwrite = self._match(TokenType.OVERWRITE) 2405 ignore = self._match(TokenType.IGNORE) 2406 local = self._match_text_seq("LOCAL") 2407 alternative = None 2408 is_function = None 2409 2410 if self._match_text_seq("DIRECTORY"): 2411 this: t.Optional[exp.Expression] = self.expression( 2412 exp.Directory, 2413 this=self._parse_var_or_string(), 2414 local=local, 2415 row_format=self._parse_row_format(match_row=True), 2416 ) 2417 else: 2418 if self._match(TokenType.OR): 2419 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2420 2421 self._match(TokenType.INTO) 2422 comments += ensure_list(self._prev_comments) 2423 self._match(TokenType.TABLE) 2424 is_function = self._match(TokenType.FUNCTION) 2425 2426 this = ( 2427 self._parse_table(schema=True, parse_partition=True) 2428 if not is_function 2429 else self._parse_function() 2430 ) 2431 2432 returning = self._parse_returning() 2433 2434 return self.expression( 2435 exp.Insert, 2436 comments=comments, 2437 hint=hint, 2438 is_function=is_function, 2439 this=this, 2440 stored=self._match_text_seq("STORED") and self._parse_stored(), 2441 by_name=self._match_text_seq("BY", "NAME"), 2442 exists=self._parse_exists(), 2443 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2444 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2445 conflict=self._parse_on_conflict(), 2446 returning=returning or self._parse_returning(), 2447 overwrite=overwrite, 2448 alternative=alternative, 2449 ignore=ignore, 2450 ) 2451 2452 def _parse_kill(self) -> exp.Kill: 2453 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2454 2455 return self.expression( 2456 exp.Kill, 2457 this=self._parse_primary(), 2458 kind=kind, 2459 ) 2460 2461 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2462 conflict = self._match_text_seq("ON", "CONFLICT") 2463 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2464 2465 if not conflict and not duplicate: 2466 return None 2467 2468 conflict_keys = None 2469 constraint = None 2470 2471 if conflict: 2472 if self._match_text_seq("ON", "CONSTRAINT"): 2473 constraint = self._parse_id_var() 2474 elif self._match(TokenType.L_PAREN): 2475 conflict_keys = self._parse_csv(self._parse_id_var) 2476 self._match_r_paren() 2477 2478 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2479 if self._prev.token_type == TokenType.UPDATE: 2480 self._match(TokenType.SET) 2481 expressions = self._parse_csv(self._parse_equality) 2482 else: 2483 expressions = None 2484 2485 return self.expression( 2486 exp.OnConflict, 2487 duplicate=duplicate, 2488 expressions=expressions, 2489 action=action, 2490 conflict_keys=conflict_keys, 2491 constraint=constraint, 2492 ) 2493 2494 def _parse_returning(self) -> t.Optional[exp.Returning]: 2495 if not self._match(TokenType.RETURNING): 2496 return None 2497 return self.expression( 2498 exp.Returning, 2499 expressions=self._parse_csv(self._parse_expression), 2500 into=self._match(TokenType.INTO) and self._parse_table_part(), 2501 ) 2502 2503 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2504 if not self._match(TokenType.FORMAT): 2505 return None 2506 return self._parse_row_format() 2507 2508 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2509 index = self._index 2510 with_ = with_ or self._match_text_seq("WITH") 2511 2512 if not self._match(TokenType.SERDE_PROPERTIES): 2513 self._retreat(index) 2514 return None 2515 return self.expression( 2516 exp.SerdeProperties, 2517 **{ # type: ignore 2518 "expressions": self._parse_wrapped_properties(), 2519 "with": with_, 2520 }, 2521 ) 2522 2523 def _parse_row_format( 2524 self, match_row: bool = False 2525 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2526 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2527 return None 2528 2529 if self._match_text_seq("SERDE"): 2530 this = self._parse_string() 2531 2532 serde_properties = self._parse_serde_properties() 2533 2534 return self.expression( 2535 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2536 ) 2537 2538 self._match_text_seq("DELIMITED") 2539 2540 kwargs = {} 2541 2542 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2543 kwargs["fields"] = self._parse_string() 2544 if self._match_text_seq("ESCAPED", "BY"): 2545 kwargs["escaped"] = self._parse_string() 2546 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2547 kwargs["collection_items"] = self._parse_string() 2548 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2549 kwargs["map_keys"] = self._parse_string() 2550 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2551 kwargs["lines"] = self._parse_string() 2552 if self._match_text_seq("NULL", "DEFINED", "AS"): 2553 kwargs["null"] = self._parse_string() 2554 2555 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2556 2557 def _parse_load(self) -> exp.LoadData | exp.Command: 2558 if self._match_text_seq("DATA"): 2559 local = self._match_text_seq("LOCAL") 2560 self._match_text_seq("INPATH") 2561 inpath = self._parse_string() 2562 overwrite = self._match(TokenType.OVERWRITE) 2563 self._match_pair(TokenType.INTO, TokenType.TABLE) 2564 2565 return self.expression( 2566 exp.LoadData, 2567 this=self._parse_table(schema=True), 2568 local=local, 2569 overwrite=overwrite, 2570 inpath=inpath, 2571 partition=self._parse_partition(), 2572 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2573 serde=self._match_text_seq("SERDE") and self._parse_string(), 2574 ) 2575 return self._parse_as_command(self._prev) 2576 2577 def _parse_delete(self) -> exp.Delete: 2578 # This handles MySQL's "Multiple-Table Syntax" 2579 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2580 tables = None 2581 comments = self._prev_comments 2582 if not self._match(TokenType.FROM, advance=False): 2583 tables = self._parse_csv(self._parse_table) or None 2584 2585 returning = self._parse_returning() 2586 2587 return self.expression( 2588 exp.Delete, 2589 comments=comments, 2590 tables=tables, 2591 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2592 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2593 where=self._parse_where(), 2594 returning=returning or self._parse_returning(), 2595 limit=self._parse_limit(), 2596 ) 2597 2598 def _parse_update(self) -> exp.Update: 2599 comments = self._prev_comments 2600 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2601 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2602 returning = self._parse_returning() 2603 return self.expression( 2604 exp.Update, 2605 comments=comments, 2606 **{ # type: ignore 2607 "this": this, 2608 "expressions": expressions, 2609 "from": self._parse_from(joins=True), 2610 "where": self._parse_where(), 2611 "returning": returning or self._parse_returning(), 2612 "order": self._parse_order(), 2613 "limit": self._parse_limit(), 2614 }, 2615 ) 2616 2617 def _parse_uncache(self) -> exp.Uncache: 2618 if not self._match(TokenType.TABLE): 2619 self.raise_error("Expecting TABLE after UNCACHE") 2620 2621 return self.expression( 2622 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2623 ) 2624 2625 def _parse_cache(self) -> exp.Cache: 2626 lazy = self._match_text_seq("LAZY") 2627 self._match(TokenType.TABLE) 2628 table = self._parse_table(schema=True) 2629 2630 options = [] 2631 if self._match_text_seq("OPTIONS"): 2632 self._match_l_paren() 2633 k = self._parse_string() 2634 self._match(TokenType.EQ) 2635 v = self._parse_string() 2636 options = [k, v] 2637 self._match_r_paren() 2638 2639 self._match(TokenType.ALIAS) 2640 return self.expression( 2641 exp.Cache, 2642 this=table, 2643 lazy=lazy, 2644 options=options, 2645 expression=self._parse_select(nested=True), 2646 ) 2647 2648 def _parse_partition(self) -> t.Optional[exp.Partition]: 2649 if not self._match(TokenType.PARTITION): 2650 return None 2651 2652 return self.expression( 2653 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2654 ) 2655 2656 def _parse_value(self) -> t.Optional[exp.Tuple]: 2657 if self._match(TokenType.L_PAREN): 2658 expressions = self._parse_csv(self._parse_expression) 2659 self._match_r_paren() 2660 return self.expression(exp.Tuple, expressions=expressions) 2661 2662 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2663 expression = self._parse_expression() 2664 if expression: 2665 return self.expression(exp.Tuple, expressions=[expression]) 2666 return None 2667 2668 def _parse_projections(self) -> t.List[exp.Expression]: 2669 return self._parse_expressions() 2670 2671 def _parse_select( 2672 self, 2673 nested: bool = False, 2674 table: bool = False, 2675 parse_subquery_alias: bool = True, 2676 parse_set_operation: bool = True, 2677 ) -> t.Optional[exp.Expression]: 2678 cte = self._parse_with() 2679 2680 if cte: 2681 this = self._parse_statement() 2682 2683 if not this: 2684 self.raise_error("Failed to parse any statement following CTE") 2685 return cte 2686 2687 if "with" in this.arg_types: 2688 this.set("with", cte) 2689 else: 2690 self.raise_error(f"{this.key} does not support CTE") 2691 this = cte 2692 2693 return this 2694 2695 # duckdb supports leading with FROM x 2696 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2697 2698 if self._match(TokenType.SELECT): 2699 comments = self._prev_comments 2700 2701 hint = self._parse_hint() 2702 all_ = self._match(TokenType.ALL) 2703 distinct = self._match_set(self.DISTINCT_TOKENS) 2704 2705 kind = ( 2706 self._match(TokenType.ALIAS) 2707 and self._match_texts(("STRUCT", "VALUE")) 2708 and self._prev.text.upper() 2709 ) 2710 2711 if distinct: 2712 distinct = self.expression( 2713 exp.Distinct, 2714 on=self._parse_value() if self._match(TokenType.ON) else None, 2715 ) 2716 2717 if all_ and distinct: 2718 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2719 2720 limit = self._parse_limit(top=True) 2721 projections = self._parse_projections() 2722 2723 this = self.expression( 2724 exp.Select, 2725 kind=kind, 2726 hint=hint, 2727 distinct=distinct, 2728 expressions=projections, 2729 limit=limit, 2730 ) 2731 this.comments = comments 2732 2733 into = self._parse_into() 2734 if into: 2735 this.set("into", into) 2736 2737 if not from_: 2738 from_ = self._parse_from() 2739 2740 if from_: 2741 this.set("from", from_) 2742 2743 this = self._parse_query_modifiers(this) 2744 elif (table or nested) and self._match(TokenType.L_PAREN): 2745 if self._match(TokenType.PIVOT): 2746 this = self._parse_simplified_pivot() 2747 elif self._match(TokenType.FROM): 2748 this = exp.select("*").from_( 2749 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2750 ) 2751 else: 2752 this = ( 2753 self._parse_table() 2754 if table 2755 else self._parse_select(nested=True, parse_set_operation=False) 2756 ) 2757 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2758 2759 self._match_r_paren() 2760 2761 # We return early here so that the UNION isn't attached to the subquery by the 2762 # following call to _parse_set_operations, but instead becomes the parent node 2763 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2764 elif self._match(TokenType.VALUES, advance=False): 2765 this = self._parse_derived_table_values() 2766 elif from_: 2767 this = exp.select("*").from_(from_.this, copy=False) 2768 else: 2769 this = None 2770 2771 if parse_set_operation: 2772 return self._parse_set_operations(this) 2773 return this 2774 2775 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2776 if not skip_with_token and not self._match(TokenType.WITH): 2777 return None 2778 2779 comments = self._prev_comments 2780 recursive = self._match(TokenType.RECURSIVE) 2781 2782 expressions = [] 2783 while True: 2784 expressions.append(self._parse_cte()) 2785 2786 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2787 break 2788 else: 2789 self._match(TokenType.WITH) 2790 2791 return self.expression( 2792 exp.With, comments=comments, expressions=expressions, recursive=recursive 2793 ) 2794 2795 def _parse_cte(self) -> exp.CTE: 2796 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2797 if not alias or not alias.this: 2798 self.raise_error("Expected CTE to have alias") 2799 2800 self._match(TokenType.ALIAS) 2801 2802 if self._match_text_seq("NOT", "MATERIALIZED"): 2803 materialized = False 2804 elif self._match_text_seq("MATERIALIZED"): 2805 materialized = True 2806 else: 2807 materialized = None 2808 2809 return self.expression( 2810 exp.CTE, 2811 this=self._parse_wrapped(self._parse_statement), 2812 alias=alias, 2813 materialized=materialized, 2814 ) 2815 2816 def _parse_table_alias( 2817 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2818 ) -> t.Optional[exp.TableAlias]: 2819 any_token = self._match(TokenType.ALIAS) 2820 alias = ( 2821 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2822 or self._parse_string_as_identifier() 2823 ) 2824 2825 index = self._index 2826 if self._match(TokenType.L_PAREN): 2827 columns = self._parse_csv(self._parse_function_parameter) 2828 self._match_r_paren() if columns else self._retreat(index) 2829 else: 2830 columns = None 2831 2832 if not alias and not columns: 2833 return None 2834 2835 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2836 2837 # We bubble up comments from the Identifier to the TableAlias 2838 if isinstance(alias, exp.Identifier): 2839 table_alias.add_comments(alias.pop_comments()) 2840 2841 return table_alias 2842 2843 def _parse_subquery( 2844 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2845 ) -> t.Optional[exp.Subquery]: 2846 if not this: 2847 return None 2848 2849 return self.expression( 2850 exp.Subquery, 2851 this=this, 2852 pivots=self._parse_pivots(), 2853 alias=self._parse_table_alias() if parse_alias else None, 2854 ) 2855 2856 def _implicit_unnests_to_explicit(self, this: E) -> E: 2857 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2858 2859 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2860 for i, join in enumerate(this.args.get("joins") or []): 2861 table = join.this 2862 normalized_table = table.copy() 2863 normalized_table.meta["maybe_column"] = True 2864 normalized_table = _norm(normalized_table, dialect=self.dialect) 2865 2866 if isinstance(table, exp.Table) and not join.args.get("on"): 2867 if normalized_table.parts[0].name in refs: 2868 table_as_column = table.to_column() 2869 unnest = exp.Unnest(expressions=[table_as_column]) 2870 2871 # Table.to_column creates a parent Alias node that we want to convert to 2872 # a TableAlias and attach to the Unnest, so it matches the parser's output 2873 if isinstance(table.args.get("alias"), exp.TableAlias): 2874 table_as_column.replace(table_as_column.this) 2875 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2876 2877 table.replace(unnest) 2878 2879 refs.add(normalized_table.alias_or_name) 2880 2881 return this 2882 2883 def _parse_query_modifiers( 2884 self, this: t.Optional[exp.Expression] 2885 ) -> t.Optional[exp.Expression]: 2886 if isinstance(this, (exp.Query, exp.Table)): 2887 for join in self._parse_joins(): 2888 this.append("joins", join) 2889 for lateral in iter(self._parse_lateral, None): 2890 this.append("laterals", lateral) 2891 2892 while True: 2893 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2894 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2895 key, expression = parser(self) 2896 2897 if expression: 2898 this.set(key, expression) 2899 if key == "limit": 2900 offset = expression.args.pop("offset", None) 2901 2902 if offset: 2903 offset = exp.Offset(expression=offset) 2904 this.set("offset", offset) 2905 2906 limit_by_expressions = expression.expressions 2907 expression.set("expressions", None) 2908 offset.set("expressions", limit_by_expressions) 2909 continue 2910 break 2911 2912 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2913 this = self._implicit_unnests_to_explicit(this) 2914 2915 return this 2916 2917 def _parse_hint(self) -> t.Optional[exp.Hint]: 2918 if self._match(TokenType.HINT): 2919 hints = [] 2920 for hint in iter( 2921 lambda: self._parse_csv( 2922 lambda: self._parse_function() or self._parse_var(upper=True) 2923 ), 2924 [], 2925 ): 2926 hints.extend(hint) 2927 2928 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2929 self.raise_error("Expected */ after HINT") 2930 2931 return self.expression(exp.Hint, expressions=hints) 2932 2933 return None 2934 2935 def _parse_into(self) -> t.Optional[exp.Into]: 2936 if not self._match(TokenType.INTO): 2937 return None 2938 2939 temp = self._match(TokenType.TEMPORARY) 2940 unlogged = self._match_text_seq("UNLOGGED") 2941 self._match(TokenType.TABLE) 2942 2943 return self.expression( 2944 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2945 ) 2946 2947 def _parse_from( 2948 self, joins: bool = False, skip_from_token: bool = False 2949 ) -> t.Optional[exp.From]: 2950 if not skip_from_token and not self._match(TokenType.FROM): 2951 return None 2952 2953 return self.expression( 2954 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2955 ) 2956 2957 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2958 return self.expression( 2959 exp.MatchRecognizeMeasure, 2960 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2961 this=self._parse_expression(), 2962 ) 2963 2964 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2965 if not self._match(TokenType.MATCH_RECOGNIZE): 2966 return None 2967 2968 self._match_l_paren() 2969 2970 partition = self._parse_partition_by() 2971 order = self._parse_order() 2972 2973 measures = ( 2974 self._parse_csv(self._parse_match_recognize_measure) 2975 if self._match_text_seq("MEASURES") 2976 else None 2977 ) 2978 2979 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2980 rows = exp.var("ONE ROW PER MATCH") 2981 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2982 text = "ALL ROWS PER MATCH" 2983 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2984 text += " SHOW EMPTY MATCHES" 2985 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2986 text += " OMIT EMPTY MATCHES" 2987 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2988 text += " WITH UNMATCHED ROWS" 2989 rows = exp.var(text) 2990 else: 2991 rows = None 2992 2993 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2994 text = "AFTER MATCH SKIP" 2995 if self._match_text_seq("PAST", "LAST", "ROW"): 2996 text += " PAST LAST ROW" 2997 elif self._match_text_seq("TO", "NEXT", "ROW"): 2998 text += " TO NEXT ROW" 2999 elif self._match_text_seq("TO", "FIRST"): 3000 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3001 elif self._match_text_seq("TO", "LAST"): 3002 text += f" TO LAST {self._advance_any().text}" # type: ignore 3003 after = exp.var(text) 3004 else: 3005 after = None 3006 3007 if self._match_text_seq("PATTERN"): 3008 self._match_l_paren() 3009 3010 if not self._curr: 3011 self.raise_error("Expecting )", self._curr) 3012 3013 paren = 1 3014 start = self._curr 3015 3016 while self._curr and paren > 0: 3017 if self._curr.token_type == TokenType.L_PAREN: 3018 paren += 1 3019 if self._curr.token_type == TokenType.R_PAREN: 3020 paren -= 1 3021 3022 end = self._prev 3023 self._advance() 3024 3025 if paren > 0: 3026 self.raise_error("Expecting )", self._curr) 3027 3028 pattern = exp.var(self._find_sql(start, end)) 3029 else: 3030 pattern = None 3031 3032 define = ( 3033 self._parse_csv(self._parse_name_as_expression) 3034 if self._match_text_seq("DEFINE") 3035 else None 3036 ) 3037 3038 self._match_r_paren() 3039 3040 return self.expression( 3041 exp.MatchRecognize, 3042 partition_by=partition, 3043 order=order, 3044 measures=measures, 3045 rows=rows, 3046 after=after, 3047 pattern=pattern, 3048 define=define, 3049 alias=self._parse_table_alias(), 3050 ) 3051 3052 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3053 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3054 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3055 cross_apply = False 3056 3057 if cross_apply is not None: 3058 this = self._parse_select(table=True) 3059 view = None 3060 outer = None 3061 elif self._match(TokenType.LATERAL): 3062 this = self._parse_select(table=True) 3063 view = self._match(TokenType.VIEW) 3064 outer = self._match(TokenType.OUTER) 3065 else: 3066 return None 3067 3068 if not this: 3069 this = ( 3070 self._parse_unnest() 3071 or self._parse_function() 3072 or self._parse_id_var(any_token=False) 3073 ) 3074 3075 while self._match(TokenType.DOT): 3076 this = exp.Dot( 3077 this=this, 3078 expression=self._parse_function() or self._parse_id_var(any_token=False), 3079 ) 3080 3081 if view: 3082 table = self._parse_id_var(any_token=False) 3083 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3084 table_alias: t.Optional[exp.TableAlias] = self.expression( 3085 exp.TableAlias, this=table, columns=columns 3086 ) 3087 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3088 # We move the alias from the lateral's child node to the lateral itself 3089 table_alias = this.args["alias"].pop() 3090 else: 3091 table_alias = self._parse_table_alias() 3092 3093 return self.expression( 3094 exp.Lateral, 3095 this=this, 3096 view=view, 3097 outer=outer, 3098 alias=table_alias, 3099 cross_apply=cross_apply, 3100 ) 3101 3102 def _parse_join_parts( 3103 self, 3104 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3105 return ( 3106 self._match_set(self.JOIN_METHODS) and self._prev, 3107 self._match_set(self.JOIN_SIDES) and self._prev, 3108 self._match_set(self.JOIN_KINDS) and self._prev, 3109 ) 3110 3111 def _parse_join( 3112 self, skip_join_token: bool = False, parse_bracket: bool = False 3113 ) -> t.Optional[exp.Join]: 3114 if self._match(TokenType.COMMA): 3115 return self.expression(exp.Join, this=self._parse_table()) 3116 3117 index = self._index 3118 method, side, kind = self._parse_join_parts() 3119 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3120 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3121 3122 if not skip_join_token and not join: 3123 self._retreat(index) 3124 kind = None 3125 method = None 3126 side = None 3127 3128 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3129 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3130 3131 if not skip_join_token and not join and not outer_apply and not cross_apply: 3132 return None 3133 3134 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3135 3136 if method: 3137 kwargs["method"] = method.text 3138 if side: 3139 kwargs["side"] = side.text 3140 if kind: 3141 kwargs["kind"] = kind.text 3142 if hint: 3143 kwargs["hint"] = hint 3144 3145 if self._match(TokenType.MATCH_CONDITION): 3146 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3147 3148 if self._match(TokenType.ON): 3149 kwargs["on"] = self._parse_assignment() 3150 elif self._match(TokenType.USING): 3151 kwargs["using"] = self._parse_wrapped_id_vars() 3152 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3153 kind and kind.token_type == TokenType.CROSS 3154 ): 3155 index = self._index 3156 joins: t.Optional[list] = list(self._parse_joins()) 3157 3158 if joins and self._match(TokenType.ON): 3159 kwargs["on"] = self._parse_assignment() 3160 elif joins and self._match(TokenType.USING): 3161 kwargs["using"] = self._parse_wrapped_id_vars() 3162 else: 3163 joins = None 3164 self._retreat(index) 3165 3166 kwargs["this"].set("joins", joins if joins else None) 3167 3168 comments = [c for token in (method, side, kind) if token for c in token.comments] 3169 return self.expression(exp.Join, comments=comments, **kwargs) 3170 3171 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3172 this = self._parse_assignment() 3173 3174 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3175 return this 3176 3177 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3178 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3179 3180 return this 3181 3182 def _parse_index_params(self) -> exp.IndexParameters: 3183 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3184 3185 if self._match(TokenType.L_PAREN, advance=False): 3186 columns = self._parse_wrapped_csv(self._parse_with_operator) 3187 else: 3188 columns = None 3189 3190 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3191 partition_by = self._parse_partition_by() 3192 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3193 tablespace = ( 3194 self._parse_var(any_token=True) 3195 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3196 else None 3197 ) 3198 where = self._parse_where() 3199 3200 on = self._parse_field() if self._match(TokenType.ON) else None 3201 3202 return self.expression( 3203 exp.IndexParameters, 3204 using=using, 3205 columns=columns, 3206 include=include, 3207 partition_by=partition_by, 3208 where=where, 3209 with_storage=with_storage, 3210 tablespace=tablespace, 3211 on=on, 3212 ) 3213 3214 def _parse_index( 3215 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3216 ) -> t.Optional[exp.Index]: 3217 if index or anonymous: 3218 unique = None 3219 primary = None 3220 amp = None 3221 3222 self._match(TokenType.ON) 3223 self._match(TokenType.TABLE) # hive 3224 table = self._parse_table_parts(schema=True) 3225 else: 3226 unique = self._match(TokenType.UNIQUE) 3227 primary = self._match_text_seq("PRIMARY") 3228 amp = self._match_text_seq("AMP") 3229 3230 if not self._match(TokenType.INDEX): 3231 return None 3232 3233 index = self._parse_id_var() 3234 table = None 3235 3236 params = self._parse_index_params() 3237 3238 return self.expression( 3239 exp.Index, 3240 this=index, 3241 table=table, 3242 unique=unique, 3243 primary=primary, 3244 amp=amp, 3245 params=params, 3246 ) 3247 3248 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3249 hints: t.List[exp.Expression] = [] 3250 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3251 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3252 hints.append( 3253 self.expression( 3254 exp.WithTableHint, 3255 expressions=self._parse_csv( 3256 lambda: self._parse_function() or self._parse_var(any_token=True) 3257 ), 3258 ) 3259 ) 3260 self._match_r_paren() 3261 else: 3262 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3263 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3264 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3265 3266 self._match_set((TokenType.INDEX, TokenType.KEY)) 3267 if self._match(TokenType.FOR): 3268 hint.set("target", self._advance_any() and self._prev.text.upper()) 3269 3270 hint.set("expressions", self._parse_wrapped_id_vars()) 3271 hints.append(hint) 3272 3273 return hints or None 3274 3275 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3276 return ( 3277 (not schema and self._parse_function(optional_parens=False)) 3278 or self._parse_id_var(any_token=False) 3279 or self._parse_string_as_identifier() 3280 or self._parse_placeholder() 3281 ) 3282 3283 def _parse_table_parts( 3284 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3285 ) -> exp.Table: 3286 catalog = None 3287 db = None 3288 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3289 3290 while self._match(TokenType.DOT): 3291 if catalog: 3292 # This allows nesting the table in arbitrarily many dot expressions if needed 3293 table = self.expression( 3294 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3295 ) 3296 else: 3297 catalog = db 3298 db = table 3299 # "" used for tsql FROM a..b case 3300 table = self._parse_table_part(schema=schema) or "" 3301 3302 if ( 3303 wildcard 3304 and self._is_connected() 3305 and (isinstance(table, exp.Identifier) or not table) 3306 and self._match(TokenType.STAR) 3307 ): 3308 if isinstance(table, exp.Identifier): 3309 table.args["this"] += "*" 3310 else: 3311 table = exp.Identifier(this="*") 3312 3313 # We bubble up comments from the Identifier to the Table 3314 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3315 3316 if is_db_reference: 3317 catalog = db 3318 db = table 3319 table = None 3320 3321 if not table and not is_db_reference: 3322 self.raise_error(f"Expected table name but got {self._curr}") 3323 if not db and is_db_reference: 3324 self.raise_error(f"Expected database name but got {self._curr}") 3325 3326 return self.expression( 3327 exp.Table, 3328 comments=comments, 3329 this=table, 3330 db=db, 3331 catalog=catalog, 3332 pivots=self._parse_pivots(), 3333 ) 3334 3335 def _parse_table( 3336 self, 3337 schema: bool = False, 3338 joins: bool = False, 3339 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3340 parse_bracket: bool = False, 3341 is_db_reference: bool = False, 3342 parse_partition: bool = False, 3343 ) -> t.Optional[exp.Expression]: 3344 lateral = self._parse_lateral() 3345 if lateral: 3346 return lateral 3347 3348 unnest = self._parse_unnest() 3349 if unnest: 3350 return unnest 3351 3352 values = self._parse_derived_table_values() 3353 if values: 3354 return values 3355 3356 subquery = self._parse_select(table=True) 3357 if subquery: 3358 if not subquery.args.get("pivots"): 3359 subquery.set("pivots", self._parse_pivots()) 3360 return subquery 3361 3362 bracket = parse_bracket and self._parse_bracket(None) 3363 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3364 3365 only = self._match(TokenType.ONLY) 3366 3367 this = t.cast( 3368 exp.Expression, 3369 bracket 3370 or self._parse_bracket( 3371 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3372 ), 3373 ) 3374 3375 if only: 3376 this.set("only", only) 3377 3378 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3379 self._match_text_seq("*") 3380 3381 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3382 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3383 this.set("partition", self._parse_partition()) 3384 3385 if schema: 3386 return self._parse_schema(this=this) 3387 3388 version = self._parse_version() 3389 3390 if version: 3391 this.set("version", version) 3392 3393 if self.dialect.ALIAS_POST_TABLESAMPLE: 3394 table_sample = self._parse_table_sample() 3395 3396 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3397 if alias: 3398 this.set("alias", alias) 3399 3400 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3401 return self.expression( 3402 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3403 ) 3404 3405 this.set("hints", self._parse_table_hints()) 3406 3407 if not this.args.get("pivots"): 3408 this.set("pivots", self._parse_pivots()) 3409 3410 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3411 table_sample = self._parse_table_sample() 3412 3413 if table_sample: 3414 table_sample.set("this", this) 3415 this = table_sample 3416 3417 if joins: 3418 for join in self._parse_joins(): 3419 this.append("joins", join) 3420 3421 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3422 this.set("ordinality", True) 3423 this.set("alias", self._parse_table_alias()) 3424 3425 return this 3426 3427 def _parse_version(self) -> t.Optional[exp.Version]: 3428 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3429 this = "TIMESTAMP" 3430 elif self._match(TokenType.VERSION_SNAPSHOT): 3431 this = "VERSION" 3432 else: 3433 return None 3434 3435 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3436 kind = self._prev.text.upper() 3437 start = self._parse_bitwise() 3438 self._match_texts(("TO", "AND")) 3439 end = self._parse_bitwise() 3440 expression: t.Optional[exp.Expression] = self.expression( 3441 exp.Tuple, expressions=[start, end] 3442 ) 3443 elif self._match_text_seq("CONTAINED", "IN"): 3444 kind = "CONTAINED IN" 3445 expression = self.expression( 3446 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3447 ) 3448 elif self._match(TokenType.ALL): 3449 kind = "ALL" 3450 expression = None 3451 else: 3452 self._match_text_seq("AS", "OF") 3453 kind = "AS OF" 3454 expression = self._parse_type() 3455 3456 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3457 3458 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3459 if not self._match(TokenType.UNNEST): 3460 return None 3461 3462 expressions = self._parse_wrapped_csv(self._parse_equality) 3463 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3464 3465 alias = self._parse_table_alias() if with_alias else None 3466 3467 if alias: 3468 if self.dialect.UNNEST_COLUMN_ONLY: 3469 if alias.args.get("columns"): 3470 self.raise_error("Unexpected extra column alias in unnest.") 3471 3472 alias.set("columns", [alias.this]) 3473 alias.set("this", None) 3474 3475 columns = alias.args.get("columns") or [] 3476 if offset and len(expressions) < len(columns): 3477 offset = columns.pop() 3478 3479 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3480 self._match(TokenType.ALIAS) 3481 offset = self._parse_id_var( 3482 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3483 ) or exp.to_identifier("offset") 3484 3485 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3486 3487 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3488 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3489 if not is_derived and not self._match_text_seq("VALUES"): 3490 return None 3491 3492 expressions = self._parse_csv(self._parse_value) 3493 alias = self._parse_table_alias() 3494 3495 if is_derived: 3496 self._match_r_paren() 3497 3498 return self.expression( 3499 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3500 ) 3501 3502 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3503 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3504 as_modifier and self._match_text_seq("USING", "SAMPLE") 3505 ): 3506 return None 3507 3508 bucket_numerator = None 3509 bucket_denominator = None 3510 bucket_field = None 3511 percent = None 3512 size = None 3513 seed = None 3514 3515 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3516 matched_l_paren = self._match(TokenType.L_PAREN) 3517 3518 if self.TABLESAMPLE_CSV: 3519 num = None 3520 expressions = self._parse_csv(self._parse_primary) 3521 else: 3522 expressions = None 3523 num = ( 3524 self._parse_factor() 3525 if self._match(TokenType.NUMBER, advance=False) 3526 else self._parse_primary() or self._parse_placeholder() 3527 ) 3528 3529 if self._match_text_seq("BUCKET"): 3530 bucket_numerator = self._parse_number() 3531 self._match_text_seq("OUT", "OF") 3532 bucket_denominator = bucket_denominator = self._parse_number() 3533 self._match(TokenType.ON) 3534 bucket_field = self._parse_field() 3535 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3536 percent = num 3537 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3538 size = num 3539 else: 3540 percent = num 3541 3542 if matched_l_paren: 3543 self._match_r_paren() 3544 3545 if self._match(TokenType.L_PAREN): 3546 method = self._parse_var(upper=True) 3547 seed = self._match(TokenType.COMMA) and self._parse_number() 3548 self._match_r_paren() 3549 elif self._match_texts(("SEED", "REPEATABLE")): 3550 seed = self._parse_wrapped(self._parse_number) 3551 3552 if not method and self.DEFAULT_SAMPLING_METHOD: 3553 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3554 3555 return self.expression( 3556 exp.TableSample, 3557 expressions=expressions, 3558 method=method, 3559 bucket_numerator=bucket_numerator, 3560 bucket_denominator=bucket_denominator, 3561 bucket_field=bucket_field, 3562 percent=percent, 3563 size=size, 3564 seed=seed, 3565 ) 3566 3567 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3568 return list(iter(self._parse_pivot, None)) or None 3569 3570 def _parse_joins(self) -> t.Iterator[exp.Join]: 3571 return iter(self._parse_join, None) 3572 3573 # https://duckdb.org/docs/sql/statements/pivot 3574 def _parse_simplified_pivot(self) -> exp.Pivot: 3575 def _parse_on() -> t.Optional[exp.Expression]: 3576 this = self._parse_bitwise() 3577 return self._parse_in(this) if self._match(TokenType.IN) else this 3578 3579 this = self._parse_table() 3580 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3581 using = self._match(TokenType.USING) and self._parse_csv( 3582 lambda: self._parse_alias(self._parse_function()) 3583 ) 3584 group = self._parse_group() 3585 return self.expression( 3586 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3587 ) 3588 3589 def _parse_pivot_in(self) -> exp.In: 3590 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3591 this = self._parse_assignment() 3592 3593 self._match(TokenType.ALIAS) 3594 alias = self._parse_field() 3595 if alias: 3596 return self.expression(exp.PivotAlias, this=this, alias=alias) 3597 3598 return this 3599 3600 value = self._parse_column() 3601 3602 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3603 self.raise_error("Expecting IN (") 3604 3605 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3606 3607 self._match_r_paren() 3608 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3609 3610 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3611 index = self._index 3612 include_nulls = None 3613 3614 if self._match(TokenType.PIVOT): 3615 unpivot = False 3616 elif self._match(TokenType.UNPIVOT): 3617 unpivot = True 3618 3619 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3620 if self._match_text_seq("INCLUDE", "NULLS"): 3621 include_nulls = True 3622 elif self._match_text_seq("EXCLUDE", "NULLS"): 3623 include_nulls = False 3624 else: 3625 return None 3626 3627 expressions = [] 3628 3629 if not self._match(TokenType.L_PAREN): 3630 self._retreat(index) 3631 return None 3632 3633 if unpivot: 3634 expressions = self._parse_csv(self._parse_column) 3635 else: 3636 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3637 3638 if not expressions: 3639 self.raise_error("Failed to parse PIVOT's aggregation list") 3640 3641 if not self._match(TokenType.FOR): 3642 self.raise_error("Expecting FOR") 3643 3644 field = self._parse_pivot_in() 3645 3646 self._match_r_paren() 3647 3648 pivot = self.expression( 3649 exp.Pivot, 3650 expressions=expressions, 3651 field=field, 3652 unpivot=unpivot, 3653 include_nulls=include_nulls, 3654 ) 3655 3656 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3657 pivot.set("alias", self._parse_table_alias()) 3658 3659 if not unpivot: 3660 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3661 3662 columns: t.List[exp.Expression] = [] 3663 for fld in pivot.args["field"].expressions: 3664 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3665 for name in names: 3666 if self.PREFIXED_PIVOT_COLUMNS: 3667 name = f"{name}_{field_name}" if name else field_name 3668 else: 3669 name = f"{field_name}_{name}" if name else field_name 3670 3671 columns.append(exp.to_identifier(name)) 3672 3673 pivot.set("columns", columns) 3674 3675 return pivot 3676 3677 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3678 return [agg.alias for agg in aggregations] 3679 3680 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3681 if not skip_where_token and not self._match(TokenType.PREWHERE): 3682 return None 3683 3684 return self.expression( 3685 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3686 ) 3687 3688 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3689 if not skip_where_token and not self._match(TokenType.WHERE): 3690 return None 3691 3692 return self.expression( 3693 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3694 ) 3695 3696 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3697 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3698 return None 3699 3700 elements: t.Dict[str, t.Any] = defaultdict(list) 3701 3702 if self._match(TokenType.ALL): 3703 elements["all"] = True 3704 elif self._match(TokenType.DISTINCT): 3705 elements["all"] = False 3706 3707 while True: 3708 expressions = self._parse_csv( 3709 lambda: None 3710 if self._match(TokenType.ROLLUP, advance=False) 3711 else self._parse_assignment() 3712 ) 3713 if expressions: 3714 elements["expressions"].extend(expressions) 3715 3716 grouping_sets = self._parse_grouping_sets() 3717 if grouping_sets: 3718 elements["grouping_sets"].extend(grouping_sets) 3719 3720 rollup = None 3721 cube = None 3722 totals = None 3723 3724 index = self._index 3725 with_ = self._match(TokenType.WITH) 3726 if self._match(TokenType.ROLLUP): 3727 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3728 elements["rollup"].extend(ensure_list(rollup)) 3729 3730 if self._match(TokenType.CUBE): 3731 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3732 elements["cube"].extend(ensure_list(cube)) 3733 3734 if self._match_text_seq("TOTALS"): 3735 totals = True 3736 elements["totals"] = True # type: ignore 3737 3738 if not (grouping_sets or rollup or cube or totals): 3739 if with_: 3740 self._retreat(index) 3741 break 3742 3743 return self.expression(exp.Group, **elements) # type: ignore 3744 3745 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3746 if not self._match(TokenType.GROUPING_SETS): 3747 return None 3748 3749 return self._parse_wrapped_csv(self._parse_grouping_set) 3750 3751 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3752 if self._match(TokenType.L_PAREN): 3753 grouping_set = self._parse_csv(self._parse_column) 3754 self._match_r_paren() 3755 return self.expression(exp.Tuple, expressions=grouping_set) 3756 3757 return self._parse_column() 3758 3759 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3760 if not skip_having_token and not self._match(TokenType.HAVING): 3761 return None 3762 return self.expression(exp.Having, this=self._parse_assignment()) 3763 3764 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3765 if not self._match(TokenType.QUALIFY): 3766 return None 3767 return self.expression(exp.Qualify, this=self._parse_assignment()) 3768 3769 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3770 if skip_start_token: 3771 start = None 3772 elif self._match(TokenType.START_WITH): 3773 start = self._parse_assignment() 3774 else: 3775 return None 3776 3777 self._match(TokenType.CONNECT_BY) 3778 nocycle = self._match_text_seq("NOCYCLE") 3779 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3780 exp.Prior, this=self._parse_bitwise() 3781 ) 3782 connect = self._parse_assignment() 3783 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3784 3785 if not start and self._match(TokenType.START_WITH): 3786 start = self._parse_assignment() 3787 3788 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3789 3790 def _parse_name_as_expression(self) -> exp.Alias: 3791 return self.expression( 3792 exp.Alias, 3793 alias=self._parse_id_var(any_token=True), 3794 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3795 ) 3796 3797 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3798 if self._match_text_seq("INTERPOLATE"): 3799 return self._parse_wrapped_csv(self._parse_name_as_expression) 3800 return None 3801 3802 def _parse_order( 3803 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3804 ) -> t.Optional[exp.Expression]: 3805 siblings = None 3806 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3807 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3808 return this 3809 3810 siblings = True 3811 3812 return self.expression( 3813 exp.Order, 3814 this=this, 3815 expressions=self._parse_csv(self._parse_ordered), 3816 interpolate=self._parse_interpolate(), 3817 siblings=siblings, 3818 ) 3819 3820 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3821 if not self._match(token): 3822 return None 3823 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3824 3825 def _parse_ordered( 3826 self, parse_method: t.Optional[t.Callable] = None 3827 ) -> t.Optional[exp.Ordered]: 3828 this = parse_method() if parse_method else self._parse_assignment() 3829 if not this: 3830 return None 3831 3832 asc = self._match(TokenType.ASC) 3833 desc = self._match(TokenType.DESC) or (asc and False) 3834 3835 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3836 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3837 3838 nulls_first = is_nulls_first or False 3839 explicitly_null_ordered = is_nulls_first or is_nulls_last 3840 3841 if ( 3842 not explicitly_null_ordered 3843 and ( 3844 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3845 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3846 ) 3847 and self.dialect.NULL_ORDERING != "nulls_are_last" 3848 ): 3849 nulls_first = True 3850 3851 if self._match_text_seq("WITH", "FILL"): 3852 with_fill = self.expression( 3853 exp.WithFill, 3854 **{ # type: ignore 3855 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3856 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3857 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3858 }, 3859 ) 3860 else: 3861 with_fill = None 3862 3863 return self.expression( 3864 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3865 ) 3866 3867 def _parse_limit( 3868 self, 3869 this: t.Optional[exp.Expression] = None, 3870 top: bool = False, 3871 skip_limit_token: bool = False, 3872 ) -> t.Optional[exp.Expression]: 3873 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3874 comments = self._prev_comments 3875 if top: 3876 limit_paren = self._match(TokenType.L_PAREN) 3877 expression = self._parse_term() if limit_paren else self._parse_number() 3878 3879 if limit_paren: 3880 self._match_r_paren() 3881 else: 3882 expression = self._parse_term() 3883 3884 if self._match(TokenType.COMMA): 3885 offset = expression 3886 expression = self._parse_term() 3887 else: 3888 offset = None 3889 3890 limit_exp = self.expression( 3891 exp.Limit, 3892 this=this, 3893 expression=expression, 3894 offset=offset, 3895 comments=comments, 3896 expressions=self._parse_limit_by(), 3897 ) 3898 3899 return limit_exp 3900 3901 if self._match(TokenType.FETCH): 3902 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3903 direction = self._prev.text.upper() if direction else "FIRST" 3904 3905 count = self._parse_field(tokens=self.FETCH_TOKENS) 3906 percent = self._match(TokenType.PERCENT) 3907 3908 self._match_set((TokenType.ROW, TokenType.ROWS)) 3909 3910 only = self._match_text_seq("ONLY") 3911 with_ties = self._match_text_seq("WITH", "TIES") 3912 3913 if only and with_ties: 3914 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3915 3916 return self.expression( 3917 exp.Fetch, 3918 direction=direction, 3919 count=count, 3920 percent=percent, 3921 with_ties=with_ties, 3922 ) 3923 3924 return this 3925 3926 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3927 if not self._match(TokenType.OFFSET): 3928 return this 3929 3930 count = self._parse_term() 3931 self._match_set((TokenType.ROW, TokenType.ROWS)) 3932 3933 return self.expression( 3934 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3935 ) 3936 3937 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3938 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3939 3940 def _parse_locks(self) -> t.List[exp.Lock]: 3941 locks = [] 3942 while True: 3943 if self._match_text_seq("FOR", "UPDATE"): 3944 update = True 3945 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3946 "LOCK", "IN", "SHARE", "MODE" 3947 ): 3948 update = False 3949 else: 3950 break 3951 3952 expressions = None 3953 if self._match_text_seq("OF"): 3954 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3955 3956 wait: t.Optional[bool | exp.Expression] = None 3957 if self._match_text_seq("NOWAIT"): 3958 wait = True 3959 elif self._match_text_seq("WAIT"): 3960 wait = self._parse_primary() 3961 elif self._match_text_seq("SKIP", "LOCKED"): 3962 wait = False 3963 3964 locks.append( 3965 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3966 ) 3967 3968 return locks 3969 3970 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3971 while this and self._match_set(self.SET_OPERATIONS): 3972 token_type = self._prev.token_type 3973 3974 if token_type == TokenType.UNION: 3975 operation: t.Type[exp.SetOperation] = exp.Union 3976 elif token_type == TokenType.EXCEPT: 3977 operation = exp.Except 3978 else: 3979 operation = exp.Intersect 3980 3981 comments = self._prev.comments 3982 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3983 by_name = self._match_text_seq("BY", "NAME") 3984 expression = self._parse_select(nested=True, parse_set_operation=False) 3985 3986 this = self.expression( 3987 operation, 3988 comments=comments, 3989 this=this, 3990 distinct=distinct, 3991 by_name=by_name, 3992 expression=expression, 3993 ) 3994 3995 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3996 expression = this.expression 3997 3998 if expression: 3999 for arg in self.SET_OP_MODIFIERS: 4000 expr = expression.args.get(arg) 4001 if expr: 4002 this.set(arg, expr.pop()) 4003 4004 return this 4005 4006 def _parse_expression(self) -> t.Optional[exp.Expression]: 4007 return self._parse_alias(self._parse_assignment()) 4008 4009 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4010 this = self._parse_disjunction() 4011 4012 while self._match_set(self.ASSIGNMENT): 4013 this = self.expression( 4014 self.ASSIGNMENT[self._prev.token_type], 4015 this=this, 4016 comments=self._prev_comments, 4017 expression=self._parse_assignment(), 4018 ) 4019 4020 return this 4021 4022 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4023 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4024 4025 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4026 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4027 4028 def _parse_equality(self) -> t.Optional[exp.Expression]: 4029 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4030 4031 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4032 return self._parse_tokens(self._parse_range, self.COMPARISON) 4033 4034 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4035 this = this or self._parse_bitwise() 4036 negate = self._match(TokenType.NOT) 4037 4038 if self._match_set(self.RANGE_PARSERS): 4039 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4040 if not expression: 4041 return this 4042 4043 this = expression 4044 elif self._match(TokenType.ISNULL): 4045 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4046 4047 # Postgres supports ISNULL and NOTNULL for conditions. 4048 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4049 if self._match(TokenType.NOTNULL): 4050 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4051 this = self.expression(exp.Not, this=this) 4052 4053 if negate: 4054 this = self.expression(exp.Not, this=this) 4055 4056 if self._match(TokenType.IS): 4057 this = self._parse_is(this) 4058 4059 return this 4060 4061 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4062 index = self._index - 1 4063 negate = self._match(TokenType.NOT) 4064 4065 if self._match_text_seq("DISTINCT", "FROM"): 4066 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4067 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4068 4069 expression = self._parse_null() or self._parse_boolean() 4070 if not expression: 4071 self._retreat(index) 4072 return None 4073 4074 this = self.expression(exp.Is, this=this, expression=expression) 4075 return self.expression(exp.Not, this=this) if negate else this 4076 4077 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4078 unnest = self._parse_unnest(with_alias=False) 4079 if unnest: 4080 this = self.expression(exp.In, this=this, unnest=unnest) 4081 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4082 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4083 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4084 4085 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4086 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4087 else: 4088 this = self.expression(exp.In, this=this, expressions=expressions) 4089 4090 if matched_l_paren: 4091 self._match_r_paren(this) 4092 elif not self._match(TokenType.R_BRACKET, expression=this): 4093 self.raise_error("Expecting ]") 4094 else: 4095 this = self.expression(exp.In, this=this, field=self._parse_field()) 4096 4097 return this 4098 4099 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4100 low = self._parse_bitwise() 4101 self._match(TokenType.AND) 4102 high = self._parse_bitwise() 4103 return self.expression(exp.Between, this=this, low=low, high=high) 4104 4105 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4106 if not self._match(TokenType.ESCAPE): 4107 return this 4108 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4109 4110 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4111 index = self._index 4112 4113 if not self._match(TokenType.INTERVAL) and match_interval: 4114 return None 4115 4116 if self._match(TokenType.STRING, advance=False): 4117 this = self._parse_primary() 4118 else: 4119 this = self._parse_term() 4120 4121 if not this or ( 4122 isinstance(this, exp.Column) 4123 and not this.table 4124 and not this.this.quoted 4125 and this.name.upper() == "IS" 4126 ): 4127 self._retreat(index) 4128 return None 4129 4130 unit = self._parse_function() or ( 4131 not self._match(TokenType.ALIAS, advance=False) 4132 and self._parse_var(any_token=True, upper=True) 4133 ) 4134 4135 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4136 # each INTERVAL expression into this canonical form so it's easy to transpile 4137 if this and this.is_number: 4138 this = exp.Literal.string(this.to_py()) 4139 elif this and this.is_string: 4140 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4141 if len(parts) == 1: 4142 if unit: 4143 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4144 self._retreat(self._index - 1) 4145 4146 this = exp.Literal.string(parts[0][0]) 4147 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4148 4149 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4150 unit = self.expression( 4151 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4152 ) 4153 4154 interval = self.expression(exp.Interval, this=this, unit=unit) 4155 4156 index = self._index 4157 self._match(TokenType.PLUS) 4158 4159 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4160 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4161 return self.expression( 4162 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4163 ) 4164 4165 self._retreat(index) 4166 return interval 4167 4168 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4169 this = self._parse_term() 4170 4171 while True: 4172 if self._match_set(self.BITWISE): 4173 this = self.expression( 4174 self.BITWISE[self._prev.token_type], 4175 this=this, 4176 expression=self._parse_term(), 4177 ) 4178 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4179 this = self.expression( 4180 exp.DPipe, 4181 this=this, 4182 expression=self._parse_term(), 4183 safe=not self.dialect.STRICT_STRING_CONCAT, 4184 ) 4185 elif self._match(TokenType.DQMARK): 4186 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4187 elif self._match_pair(TokenType.LT, TokenType.LT): 4188 this = self.expression( 4189 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4190 ) 4191 elif self._match_pair(TokenType.GT, TokenType.GT): 4192 this = self.expression( 4193 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4194 ) 4195 else: 4196 break 4197 4198 return this 4199 4200 def _parse_term(self) -> t.Optional[exp.Expression]: 4201 return self._parse_tokens(self._parse_factor, self.TERM) 4202 4203 def _parse_factor(self) -> t.Optional[exp.Expression]: 4204 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4205 this = parse_method() 4206 4207 while self._match_set(self.FACTOR): 4208 klass = self.FACTOR[self._prev.token_type] 4209 comments = self._prev_comments 4210 expression = parse_method() 4211 4212 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4213 self._retreat(self._index - 1) 4214 return this 4215 4216 this = self.expression(klass, this=this, comments=comments, expression=expression) 4217 4218 if isinstance(this, exp.Div): 4219 this.args["typed"] = self.dialect.TYPED_DIVISION 4220 this.args["safe"] = self.dialect.SAFE_DIVISION 4221 4222 return this 4223 4224 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4225 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4226 4227 def _parse_unary(self) -> t.Optional[exp.Expression]: 4228 if self._match_set(self.UNARY_PARSERS): 4229 return self.UNARY_PARSERS[self._prev.token_type](self) 4230 return self._parse_at_time_zone(self._parse_type()) 4231 4232 def _parse_type( 4233 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4234 ) -> t.Optional[exp.Expression]: 4235 interval = parse_interval and self._parse_interval() 4236 if interval: 4237 return interval 4238 4239 index = self._index 4240 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4241 4242 if data_type: 4243 index2 = self._index 4244 this = self._parse_primary() 4245 4246 if isinstance(this, exp.Literal): 4247 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4248 if parser: 4249 return parser(self, this, data_type) 4250 4251 return self.expression(exp.Cast, this=this, to=data_type) 4252 4253 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4254 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4255 # 4256 # If the index difference here is greater than 1, that means the parser itself must have 4257 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4258 # 4259 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4260 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4261 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4262 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4263 # 4264 # In these cases, we don't really want to return the converted type, but instead retreat 4265 # and try to parse a Column or Identifier in the section below. 4266 if data_type.expressions and index2 - index > 1: 4267 self._retreat(index2) 4268 return self._parse_column_ops(data_type) 4269 4270 self._retreat(index) 4271 4272 if fallback_to_identifier: 4273 return self._parse_id_var() 4274 4275 this = self._parse_column() 4276 return this and self._parse_column_ops(this) 4277 4278 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4279 this = self._parse_type() 4280 if not this: 4281 return None 4282 4283 if isinstance(this, exp.Column) and not this.table: 4284 this = exp.var(this.name.upper()) 4285 4286 return self.expression( 4287 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4288 ) 4289 4290 def _parse_types( 4291 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4292 ) -> t.Optional[exp.Expression]: 4293 index = self._index 4294 4295 this: t.Optional[exp.Expression] = None 4296 prefix = self._match_text_seq("SYSUDTLIB", ".") 4297 4298 if not self._match_set(self.TYPE_TOKENS): 4299 identifier = allow_identifiers and self._parse_id_var( 4300 any_token=False, tokens=(TokenType.VAR,) 4301 ) 4302 if isinstance(identifier, exp.Identifier): 4303 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4304 4305 if len(tokens) != 1: 4306 self.raise_error("Unexpected identifier", self._prev) 4307 4308 if tokens[0].token_type in self.TYPE_TOKENS: 4309 self._prev = tokens[0] 4310 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4311 type_name = identifier.name 4312 4313 while self._match(TokenType.DOT): 4314 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4315 4316 this = exp.DataType.build(type_name, udt=True) 4317 else: 4318 self._retreat(self._index - 1) 4319 return None 4320 else: 4321 return None 4322 4323 type_token = self._prev.token_type 4324 4325 if type_token == TokenType.PSEUDO_TYPE: 4326 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4327 4328 if type_token == TokenType.OBJECT_IDENTIFIER: 4329 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4330 4331 # https://materialize.com/docs/sql/types/map/ 4332 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4333 key_type = self._parse_types( 4334 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4335 ) 4336 if not self._match(TokenType.FARROW): 4337 self._retreat(index) 4338 return None 4339 4340 value_type = self._parse_types( 4341 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4342 ) 4343 if not self._match(TokenType.R_BRACKET): 4344 self._retreat(index) 4345 return None 4346 4347 return exp.DataType( 4348 this=exp.DataType.Type.MAP, 4349 expressions=[key_type, value_type], 4350 nested=True, 4351 prefix=prefix, 4352 ) 4353 4354 nested = type_token in self.NESTED_TYPE_TOKENS 4355 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4356 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4357 expressions = None 4358 maybe_func = False 4359 4360 if self._match(TokenType.L_PAREN): 4361 if is_struct: 4362 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4363 elif nested: 4364 expressions = self._parse_csv( 4365 lambda: self._parse_types( 4366 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4367 ) 4368 ) 4369 elif type_token in self.ENUM_TYPE_TOKENS: 4370 expressions = self._parse_csv(self._parse_equality) 4371 elif is_aggregate: 4372 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4373 any_token=False, tokens=(TokenType.VAR,) 4374 ) 4375 if not func_or_ident or not self._match(TokenType.COMMA): 4376 return None 4377 expressions = self._parse_csv( 4378 lambda: self._parse_types( 4379 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4380 ) 4381 ) 4382 expressions.insert(0, func_or_ident) 4383 else: 4384 expressions = self._parse_csv(self._parse_type_size) 4385 4386 if not expressions or not self._match(TokenType.R_PAREN): 4387 self._retreat(index) 4388 return None 4389 4390 maybe_func = True 4391 4392 values: t.Optional[t.List[exp.Expression]] = None 4393 4394 if nested and self._match(TokenType.LT): 4395 if is_struct: 4396 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4397 else: 4398 expressions = self._parse_csv( 4399 lambda: self._parse_types( 4400 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4401 ) 4402 ) 4403 4404 if not self._match(TokenType.GT): 4405 self.raise_error("Expecting >") 4406 4407 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4408 values = self._parse_csv(self._parse_assignment) 4409 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4410 4411 if type_token in self.TIMESTAMPS: 4412 if self._match_text_seq("WITH", "TIME", "ZONE"): 4413 maybe_func = False 4414 tz_type = ( 4415 exp.DataType.Type.TIMETZ 4416 if type_token in self.TIMES 4417 else exp.DataType.Type.TIMESTAMPTZ 4418 ) 4419 this = exp.DataType(this=tz_type, expressions=expressions) 4420 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4421 maybe_func = False 4422 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4423 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4424 maybe_func = False 4425 elif type_token == TokenType.INTERVAL: 4426 unit = self._parse_var(upper=True) 4427 if unit: 4428 if self._match_text_seq("TO"): 4429 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4430 4431 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4432 else: 4433 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4434 4435 if maybe_func and check_func: 4436 index2 = self._index 4437 peek = self._parse_string() 4438 4439 if not peek: 4440 self._retreat(index) 4441 return None 4442 4443 self._retreat(index2) 4444 4445 if not this: 4446 if self._match_text_seq("UNSIGNED"): 4447 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4448 if not unsigned_type_token: 4449 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4450 4451 type_token = unsigned_type_token or type_token 4452 4453 this = exp.DataType( 4454 this=exp.DataType.Type[type_token.value], 4455 expressions=expressions, 4456 nested=nested, 4457 values=values, 4458 prefix=prefix, 4459 ) 4460 elif expressions: 4461 this.set("expressions", expressions) 4462 4463 # https://materialize.com/docs/sql/types/list/#type-name 4464 while self._match(TokenType.LIST): 4465 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4466 4467 index = self._index 4468 4469 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4470 matched_array = self._match(TokenType.ARRAY) 4471 4472 while self._curr: 4473 matched_l_bracket = self._match(TokenType.L_BRACKET) 4474 if not matched_l_bracket and not matched_array: 4475 break 4476 4477 matched_array = False 4478 values = self._parse_csv(self._parse_assignment) or None 4479 if values and not schema: 4480 self._retreat(index) 4481 break 4482 4483 this = exp.DataType( 4484 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4485 ) 4486 self._match(TokenType.R_BRACKET) 4487 4488 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4489 converter = self.TYPE_CONVERTERS.get(this.this) 4490 if converter: 4491 this = converter(t.cast(exp.DataType, this)) 4492 4493 return this 4494 4495 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4496 index = self._index 4497 4498 if ( 4499 self._curr 4500 and self._next 4501 and self._curr.token_type in self.TYPE_TOKENS 4502 and self._next.token_type in self.TYPE_TOKENS 4503 ): 4504 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4505 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4506 this = self._parse_id_var() 4507 else: 4508 this = ( 4509 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4510 or self._parse_id_var() 4511 ) 4512 4513 self._match(TokenType.COLON) 4514 4515 if ( 4516 type_required 4517 and not isinstance(this, exp.DataType) 4518 and not self._match_set(self.TYPE_TOKENS, advance=False) 4519 ): 4520 self._retreat(index) 4521 return self._parse_types() 4522 4523 return self._parse_column_def(this) 4524 4525 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4526 if not self._match_text_seq("AT", "TIME", "ZONE"): 4527 return this 4528 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4529 4530 def _parse_column(self) -> t.Optional[exp.Expression]: 4531 this = self._parse_column_reference() 4532 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4533 4534 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4535 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4536 4537 return column 4538 4539 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4540 this = self._parse_field() 4541 if ( 4542 not this 4543 and self._match(TokenType.VALUES, advance=False) 4544 and self.VALUES_FOLLOWED_BY_PAREN 4545 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4546 ): 4547 this = self._parse_id_var() 4548 4549 if isinstance(this, exp.Identifier): 4550 # We bubble up comments from the Identifier to the Column 4551 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4552 4553 return this 4554 4555 def _parse_colon_as_json_extract( 4556 self, this: t.Optional[exp.Expression] 4557 ) -> t.Optional[exp.Expression]: 4558 casts = [] 4559 json_path = [] 4560 4561 while self._match(TokenType.COLON): 4562 start_index = self._index 4563 4564 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4565 path = self._parse_column_ops( 4566 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4567 ) 4568 4569 # The cast :: operator has a lower precedence than the extraction operator :, so 4570 # we rearrange the AST appropriately to avoid casting the JSON path 4571 while isinstance(path, exp.Cast): 4572 casts.append(path.to) 4573 path = path.this 4574 4575 if casts: 4576 dcolon_offset = next( 4577 i 4578 for i, t in enumerate(self._tokens[start_index:]) 4579 if t.token_type == TokenType.DCOLON 4580 ) 4581 end_token = self._tokens[start_index + dcolon_offset - 1] 4582 else: 4583 end_token = self._prev 4584 4585 if path: 4586 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4587 4588 if json_path: 4589 this = self.expression( 4590 exp.JSONExtract, 4591 this=this, 4592 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4593 ) 4594 4595 while casts: 4596 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4597 4598 return this 4599 4600 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4601 return self._parse_types() 4602 4603 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4604 this = self._parse_bracket(this) 4605 4606 while self._match_set(self.COLUMN_OPERATORS): 4607 op_token = self._prev.token_type 4608 op = self.COLUMN_OPERATORS.get(op_token) 4609 4610 if op_token == TokenType.DCOLON: 4611 field = self._parse_dcolon() 4612 if not field: 4613 self.raise_error("Expected type") 4614 elif op and self._curr: 4615 field = self._parse_column_reference() 4616 else: 4617 field = self._parse_field(any_token=True, anonymous_func=True) 4618 4619 if isinstance(field, exp.Func) and this: 4620 # bigquery allows function calls like x.y.count(...) 4621 # SAFE.SUBSTR(...) 4622 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4623 this = exp.replace_tree( 4624 this, 4625 lambda n: ( 4626 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4627 if n.table 4628 else n.this 4629 ) 4630 if isinstance(n, exp.Column) 4631 else n, 4632 ) 4633 4634 if op: 4635 this = op(self, this, field) 4636 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4637 this = self.expression( 4638 exp.Column, 4639 this=field, 4640 table=this.this, 4641 db=this.args.get("table"), 4642 catalog=this.args.get("db"), 4643 ) 4644 else: 4645 this = self.expression(exp.Dot, this=this, expression=field) 4646 4647 this = self._parse_bracket(this) 4648 4649 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4650 4651 def _parse_primary(self) -> t.Optional[exp.Expression]: 4652 if self._match_set(self.PRIMARY_PARSERS): 4653 token_type = self._prev.token_type 4654 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4655 4656 if token_type == TokenType.STRING: 4657 expressions = [primary] 4658 while self._match(TokenType.STRING): 4659 expressions.append(exp.Literal.string(self._prev.text)) 4660 4661 if len(expressions) > 1: 4662 return self.expression(exp.Concat, expressions=expressions) 4663 4664 return primary 4665 4666 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4667 return exp.Literal.number(f"0.{self._prev.text}") 4668 4669 if self._match(TokenType.L_PAREN): 4670 comments = self._prev_comments 4671 query = self._parse_select() 4672 4673 if query: 4674 expressions = [query] 4675 else: 4676 expressions = self._parse_expressions() 4677 4678 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4679 4680 if not this and self._match(TokenType.R_PAREN, advance=False): 4681 this = self.expression(exp.Tuple) 4682 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4683 this = self._parse_subquery(this=this, parse_alias=False) 4684 elif isinstance(this, exp.Subquery): 4685 this = self._parse_subquery( 4686 this=self._parse_set_operations(this), parse_alias=False 4687 ) 4688 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4689 this = self.expression(exp.Tuple, expressions=expressions) 4690 else: 4691 this = self.expression(exp.Paren, this=this) 4692 4693 if this: 4694 this.add_comments(comments) 4695 4696 self._match_r_paren(expression=this) 4697 return this 4698 4699 return None 4700 4701 def _parse_field( 4702 self, 4703 any_token: bool = False, 4704 tokens: t.Optional[t.Collection[TokenType]] = None, 4705 anonymous_func: bool = False, 4706 ) -> t.Optional[exp.Expression]: 4707 if anonymous_func: 4708 field = ( 4709 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4710 or self._parse_primary() 4711 ) 4712 else: 4713 field = self._parse_primary() or self._parse_function( 4714 anonymous=anonymous_func, any_token=any_token 4715 ) 4716 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4717 4718 def _parse_function( 4719 self, 4720 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4721 anonymous: bool = False, 4722 optional_parens: bool = True, 4723 any_token: bool = False, 4724 ) -> t.Optional[exp.Expression]: 4725 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4726 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4727 fn_syntax = False 4728 if ( 4729 self._match(TokenType.L_BRACE, advance=False) 4730 and self._next 4731 and self._next.text.upper() == "FN" 4732 ): 4733 self._advance(2) 4734 fn_syntax = True 4735 4736 func = self._parse_function_call( 4737 functions=functions, 4738 anonymous=anonymous, 4739 optional_parens=optional_parens, 4740 any_token=any_token, 4741 ) 4742 4743 if fn_syntax: 4744 self._match(TokenType.R_BRACE) 4745 4746 return func 4747 4748 def _parse_function_call( 4749 self, 4750 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4751 anonymous: bool = False, 4752 optional_parens: bool = True, 4753 any_token: bool = False, 4754 ) -> t.Optional[exp.Expression]: 4755 if not self._curr: 4756 return None 4757 4758 comments = self._curr.comments 4759 token_type = self._curr.token_type 4760 this = self._curr.text 4761 upper = this.upper() 4762 4763 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4764 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4765 self._advance() 4766 return self._parse_window(parser(self)) 4767 4768 if not self._next or self._next.token_type != TokenType.L_PAREN: 4769 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4770 self._advance() 4771 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4772 4773 return None 4774 4775 if any_token: 4776 if token_type in self.RESERVED_TOKENS: 4777 return None 4778 elif token_type not in self.FUNC_TOKENS: 4779 return None 4780 4781 self._advance(2) 4782 4783 parser = self.FUNCTION_PARSERS.get(upper) 4784 if parser and not anonymous: 4785 this = parser(self) 4786 else: 4787 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4788 4789 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4790 this = self.expression(subquery_predicate, this=self._parse_select()) 4791 self._match_r_paren() 4792 return this 4793 4794 if functions is None: 4795 functions = self.FUNCTIONS 4796 4797 function = functions.get(upper) 4798 4799 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4800 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4801 4802 if alias: 4803 args = self._kv_to_prop_eq(args) 4804 4805 if function and not anonymous: 4806 if "dialect" in function.__code__.co_varnames: 4807 func = function(args, dialect=self.dialect) 4808 else: 4809 func = function(args) 4810 4811 func = self.validate_expression(func, args) 4812 if not self.dialect.NORMALIZE_FUNCTIONS: 4813 func.meta["name"] = this 4814 4815 this = func 4816 else: 4817 if token_type == TokenType.IDENTIFIER: 4818 this = exp.Identifier(this=this, quoted=True) 4819 this = self.expression(exp.Anonymous, this=this, expressions=args) 4820 4821 if isinstance(this, exp.Expression): 4822 this.add_comments(comments) 4823 4824 self._match_r_paren(this) 4825 return self._parse_window(this) 4826 4827 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4828 transformed = [] 4829 4830 for e in expressions: 4831 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4832 if isinstance(e, exp.Alias): 4833 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4834 4835 if not isinstance(e, exp.PropertyEQ): 4836 e = self.expression( 4837 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4838 ) 4839 4840 if isinstance(e.this, exp.Column): 4841 e.this.replace(e.this.this) 4842 4843 transformed.append(e) 4844 4845 return transformed 4846 4847 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4848 return self._parse_column_def(self._parse_id_var()) 4849 4850 def _parse_user_defined_function( 4851 self, kind: t.Optional[TokenType] = None 4852 ) -> t.Optional[exp.Expression]: 4853 this = self._parse_id_var() 4854 4855 while self._match(TokenType.DOT): 4856 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4857 4858 if not self._match(TokenType.L_PAREN): 4859 return this 4860 4861 expressions = self._parse_csv(self._parse_function_parameter) 4862 self._match_r_paren() 4863 return self.expression( 4864 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4865 ) 4866 4867 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4868 literal = self._parse_primary() 4869 if literal: 4870 return self.expression(exp.Introducer, this=token.text, expression=literal) 4871 4872 return self.expression(exp.Identifier, this=token.text) 4873 4874 def _parse_session_parameter(self) -> exp.SessionParameter: 4875 kind = None 4876 this = self._parse_id_var() or self._parse_primary() 4877 4878 if this and self._match(TokenType.DOT): 4879 kind = this.name 4880 this = self._parse_var() or self._parse_primary() 4881 4882 return self.expression(exp.SessionParameter, this=this, kind=kind) 4883 4884 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4885 return self._parse_id_var() 4886 4887 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4888 index = self._index 4889 4890 if self._match(TokenType.L_PAREN): 4891 expressions = t.cast( 4892 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4893 ) 4894 4895 if not self._match(TokenType.R_PAREN): 4896 self._retreat(index) 4897 else: 4898 expressions = [self._parse_lambda_arg()] 4899 4900 if self._match_set(self.LAMBDAS): 4901 return self.LAMBDAS[self._prev.token_type](self, expressions) 4902 4903 self._retreat(index) 4904 4905 this: t.Optional[exp.Expression] 4906 4907 if self._match(TokenType.DISTINCT): 4908 this = self.expression( 4909 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4910 ) 4911 else: 4912 this = self._parse_select_or_expression(alias=alias) 4913 4914 return self._parse_limit( 4915 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4916 ) 4917 4918 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4919 index = self._index 4920 if not self._match(TokenType.L_PAREN): 4921 return this 4922 4923 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4924 # expr can be of both types 4925 if self._match_set(self.SELECT_START_TOKENS): 4926 self._retreat(index) 4927 return this 4928 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4929 self._match_r_paren() 4930 return self.expression(exp.Schema, this=this, expressions=args) 4931 4932 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4933 return self._parse_column_def(self._parse_field(any_token=True)) 4934 4935 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4936 # column defs are not really columns, they're identifiers 4937 if isinstance(this, exp.Column): 4938 this = this.this 4939 4940 kind = self._parse_types(schema=True) 4941 4942 if self._match_text_seq("FOR", "ORDINALITY"): 4943 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4944 4945 constraints: t.List[exp.Expression] = [] 4946 4947 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4948 ("ALIAS", "MATERIALIZED") 4949 ): 4950 persisted = self._prev.text.upper() == "MATERIALIZED" 4951 constraints.append( 4952 self.expression( 4953 exp.ComputedColumnConstraint, 4954 this=self._parse_assignment(), 4955 persisted=persisted or self._match_text_seq("PERSISTED"), 4956 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4957 ) 4958 ) 4959 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4960 self._match(TokenType.ALIAS) 4961 constraints.append( 4962 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4963 ) 4964 4965 while True: 4966 constraint = self._parse_column_constraint() 4967 if not constraint: 4968 break 4969 constraints.append(constraint) 4970 4971 if not kind and not constraints: 4972 return this 4973 4974 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4975 4976 def _parse_auto_increment( 4977 self, 4978 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4979 start = None 4980 increment = None 4981 4982 if self._match(TokenType.L_PAREN, advance=False): 4983 args = self._parse_wrapped_csv(self._parse_bitwise) 4984 start = seq_get(args, 0) 4985 increment = seq_get(args, 1) 4986 elif self._match_text_seq("START"): 4987 start = self._parse_bitwise() 4988 self._match_text_seq("INCREMENT") 4989 increment = self._parse_bitwise() 4990 4991 if start and increment: 4992 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4993 4994 return exp.AutoIncrementColumnConstraint() 4995 4996 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4997 if not self._match_text_seq("REFRESH"): 4998 self._retreat(self._index - 1) 4999 return None 5000 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5001 5002 def _parse_compress(self) -> exp.CompressColumnConstraint: 5003 if self._match(TokenType.L_PAREN, advance=False): 5004 return self.expression( 5005 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5006 ) 5007 5008 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5009 5010 def _parse_generated_as_identity( 5011 self, 5012 ) -> ( 5013 exp.GeneratedAsIdentityColumnConstraint 5014 | exp.ComputedColumnConstraint 5015 | exp.GeneratedAsRowColumnConstraint 5016 ): 5017 if self._match_text_seq("BY", "DEFAULT"): 5018 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5019 this = self.expression( 5020 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5021 ) 5022 else: 5023 self._match_text_seq("ALWAYS") 5024 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5025 5026 self._match(TokenType.ALIAS) 5027 5028 if self._match_text_seq("ROW"): 5029 start = self._match_text_seq("START") 5030 if not start: 5031 self._match(TokenType.END) 5032 hidden = self._match_text_seq("HIDDEN") 5033 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5034 5035 identity = self._match_text_seq("IDENTITY") 5036 5037 if self._match(TokenType.L_PAREN): 5038 if self._match(TokenType.START_WITH): 5039 this.set("start", self._parse_bitwise()) 5040 if self._match_text_seq("INCREMENT", "BY"): 5041 this.set("increment", self._parse_bitwise()) 5042 if self._match_text_seq("MINVALUE"): 5043 this.set("minvalue", self._parse_bitwise()) 5044 if self._match_text_seq("MAXVALUE"): 5045 this.set("maxvalue", self._parse_bitwise()) 5046 5047 if self._match_text_seq("CYCLE"): 5048 this.set("cycle", True) 5049 elif self._match_text_seq("NO", "CYCLE"): 5050 this.set("cycle", False) 5051 5052 if not identity: 5053 this.set("expression", self._parse_range()) 5054 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5055 args = self._parse_csv(self._parse_bitwise) 5056 this.set("start", seq_get(args, 0)) 5057 this.set("increment", seq_get(args, 1)) 5058 5059 self._match_r_paren() 5060 5061 return this 5062 5063 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5064 self._match_text_seq("LENGTH") 5065 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5066 5067 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5068 if self._match_text_seq("NULL"): 5069 return self.expression(exp.NotNullColumnConstraint) 5070 if self._match_text_seq("CASESPECIFIC"): 5071 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5072 if self._match_text_seq("FOR", "REPLICATION"): 5073 return self.expression(exp.NotForReplicationColumnConstraint) 5074 return None 5075 5076 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5077 if self._match(TokenType.CONSTRAINT): 5078 this = self._parse_id_var() 5079 else: 5080 this = None 5081 5082 if self._match_texts(self.CONSTRAINT_PARSERS): 5083 return self.expression( 5084 exp.ColumnConstraint, 5085 this=this, 5086 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5087 ) 5088 5089 return this 5090 5091 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5092 if not self._match(TokenType.CONSTRAINT): 5093 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5094 5095 return self.expression( 5096 exp.Constraint, 5097 this=self._parse_id_var(), 5098 expressions=self._parse_unnamed_constraints(), 5099 ) 5100 5101 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5102 constraints = [] 5103 while True: 5104 constraint = self._parse_unnamed_constraint() or self._parse_function() 5105 if not constraint: 5106 break 5107 constraints.append(constraint) 5108 5109 return constraints 5110 5111 def _parse_unnamed_constraint( 5112 self, constraints: t.Optional[t.Collection[str]] = None 5113 ) -> t.Optional[exp.Expression]: 5114 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5115 constraints or self.CONSTRAINT_PARSERS 5116 ): 5117 return None 5118 5119 constraint = self._prev.text.upper() 5120 if constraint not in self.CONSTRAINT_PARSERS: 5121 self.raise_error(f"No parser found for schema constraint {constraint}.") 5122 5123 return self.CONSTRAINT_PARSERS[constraint](self) 5124 5125 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5126 self._match_text_seq("KEY") 5127 return self.expression( 5128 exp.UniqueColumnConstraint, 5129 this=self._parse_schema(self._parse_id_var(any_token=False)), 5130 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5131 on_conflict=self._parse_on_conflict(), 5132 ) 5133 5134 def _parse_key_constraint_options(self) -> t.List[str]: 5135 options = [] 5136 while True: 5137 if not self._curr: 5138 break 5139 5140 if self._match(TokenType.ON): 5141 action = None 5142 on = self._advance_any() and self._prev.text 5143 5144 if self._match_text_seq("NO", "ACTION"): 5145 action = "NO ACTION" 5146 elif self._match_text_seq("CASCADE"): 5147 action = "CASCADE" 5148 elif self._match_text_seq("RESTRICT"): 5149 action = "RESTRICT" 5150 elif self._match_pair(TokenType.SET, TokenType.NULL): 5151 action = "SET NULL" 5152 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5153 action = "SET DEFAULT" 5154 else: 5155 self.raise_error("Invalid key constraint") 5156 5157 options.append(f"ON {on} {action}") 5158 elif self._match_text_seq("NOT", "ENFORCED"): 5159 options.append("NOT ENFORCED") 5160 elif self._match_text_seq("DEFERRABLE"): 5161 options.append("DEFERRABLE") 5162 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5163 options.append("INITIALLY DEFERRED") 5164 elif self._match_text_seq("NORELY"): 5165 options.append("NORELY") 5166 elif self._match_text_seq("MATCH", "FULL"): 5167 options.append("MATCH FULL") 5168 else: 5169 break 5170 5171 return options 5172 5173 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5174 if match and not self._match(TokenType.REFERENCES): 5175 return None 5176 5177 expressions = None 5178 this = self._parse_table(schema=True) 5179 options = self._parse_key_constraint_options() 5180 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5181 5182 def _parse_foreign_key(self) -> exp.ForeignKey: 5183 expressions = self._parse_wrapped_id_vars() 5184 reference = self._parse_references() 5185 options = {} 5186 5187 while self._match(TokenType.ON): 5188 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5189 self.raise_error("Expected DELETE or UPDATE") 5190 5191 kind = self._prev.text.lower() 5192 5193 if self._match_text_seq("NO", "ACTION"): 5194 action = "NO ACTION" 5195 elif self._match(TokenType.SET): 5196 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5197 action = "SET " + self._prev.text.upper() 5198 else: 5199 self._advance() 5200 action = self._prev.text.upper() 5201 5202 options[kind] = action 5203 5204 return self.expression( 5205 exp.ForeignKey, 5206 expressions=expressions, 5207 reference=reference, 5208 **options, # type: ignore 5209 ) 5210 5211 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5212 return self._parse_field() 5213 5214 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5215 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5216 self._retreat(self._index - 1) 5217 return None 5218 5219 id_vars = self._parse_wrapped_id_vars() 5220 return self.expression( 5221 exp.PeriodForSystemTimeConstraint, 5222 this=seq_get(id_vars, 0), 5223 expression=seq_get(id_vars, 1), 5224 ) 5225 5226 def _parse_primary_key( 5227 self, wrapped_optional: bool = False, in_props: bool = False 5228 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5229 desc = ( 5230 self._match_set((TokenType.ASC, TokenType.DESC)) 5231 and self._prev.token_type == TokenType.DESC 5232 ) 5233 5234 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5235 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5236 5237 expressions = self._parse_wrapped_csv( 5238 self._parse_primary_key_part, optional=wrapped_optional 5239 ) 5240 options = self._parse_key_constraint_options() 5241 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5242 5243 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5244 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5245 5246 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5247 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5248 return this 5249 5250 bracket_kind = self._prev.token_type 5251 expressions = self._parse_csv( 5252 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5253 ) 5254 5255 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5256 self.raise_error("Expected ]") 5257 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5258 self.raise_error("Expected }") 5259 5260 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5261 if bracket_kind == TokenType.L_BRACE: 5262 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5263 elif not this: 5264 this = self.expression(exp.Array, expressions=expressions) 5265 else: 5266 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5267 if constructor_type: 5268 return self.expression(constructor_type, expressions=expressions) 5269 5270 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5271 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5272 5273 self._add_comments(this) 5274 return self._parse_bracket(this) 5275 5276 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5277 if self._match(TokenType.COLON): 5278 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5279 return this 5280 5281 def _parse_case(self) -> t.Optional[exp.Expression]: 5282 ifs = [] 5283 default = None 5284 5285 comments = self._prev_comments 5286 expression = self._parse_assignment() 5287 5288 while self._match(TokenType.WHEN): 5289 this = self._parse_assignment() 5290 self._match(TokenType.THEN) 5291 then = self._parse_assignment() 5292 ifs.append(self.expression(exp.If, this=this, true=then)) 5293 5294 if self._match(TokenType.ELSE): 5295 default = self._parse_assignment() 5296 5297 if not self._match(TokenType.END): 5298 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5299 default = exp.column("interval") 5300 else: 5301 self.raise_error("Expected END after CASE", self._prev) 5302 5303 return self.expression( 5304 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5305 ) 5306 5307 def _parse_if(self) -> t.Optional[exp.Expression]: 5308 if self._match(TokenType.L_PAREN): 5309 args = self._parse_csv(self._parse_assignment) 5310 this = self.validate_expression(exp.If.from_arg_list(args), args) 5311 self._match_r_paren() 5312 else: 5313 index = self._index - 1 5314 5315 if self.NO_PAREN_IF_COMMANDS and index == 0: 5316 return self._parse_as_command(self._prev) 5317 5318 condition = self._parse_assignment() 5319 5320 if not condition: 5321 self._retreat(index) 5322 return None 5323 5324 self._match(TokenType.THEN) 5325 true = self._parse_assignment() 5326 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5327 self._match(TokenType.END) 5328 this = self.expression(exp.If, this=condition, true=true, false=false) 5329 5330 return this 5331 5332 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5333 if not self._match_text_seq("VALUE", "FOR"): 5334 self._retreat(self._index - 1) 5335 return None 5336 5337 return self.expression( 5338 exp.NextValueFor, 5339 this=self._parse_column(), 5340 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5341 ) 5342 5343 def _parse_extract(self) -> exp.Extract: 5344 this = self._parse_function() or self._parse_var_or_string(upper=True) 5345 5346 if self._match(TokenType.FROM): 5347 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5348 5349 if not self._match(TokenType.COMMA): 5350 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5351 5352 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5353 5354 def _parse_gap_fill(self) -> exp.GapFill: 5355 self._match(TokenType.TABLE) 5356 this = self._parse_table() 5357 5358 self._match(TokenType.COMMA) 5359 args = [this, *self._parse_csv(self._parse_lambda)] 5360 5361 gap_fill = exp.GapFill.from_arg_list(args) 5362 return self.validate_expression(gap_fill, args) 5363 5364 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5365 this = self._parse_assignment() 5366 5367 if not self._match(TokenType.ALIAS): 5368 if self._match(TokenType.COMMA): 5369 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5370 5371 self.raise_error("Expected AS after CAST") 5372 5373 fmt = None 5374 to = self._parse_types() 5375 5376 if self._match(TokenType.FORMAT): 5377 fmt_string = self._parse_string() 5378 fmt = self._parse_at_time_zone(fmt_string) 5379 5380 if not to: 5381 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5382 if to.this in exp.DataType.TEMPORAL_TYPES: 5383 this = self.expression( 5384 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5385 this=this, 5386 format=exp.Literal.string( 5387 format_time( 5388 fmt_string.this if fmt_string else "", 5389 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5390 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5391 ) 5392 ), 5393 safe=safe, 5394 ) 5395 5396 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5397 this.set("zone", fmt.args["zone"]) 5398 return this 5399 elif not to: 5400 self.raise_error("Expected TYPE after CAST") 5401 elif isinstance(to, exp.Identifier): 5402 to = exp.DataType.build(to.name, udt=True) 5403 elif to.this == exp.DataType.Type.CHAR: 5404 if self._match(TokenType.CHARACTER_SET): 5405 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5406 5407 return self.expression( 5408 exp.Cast if strict else exp.TryCast, 5409 this=this, 5410 to=to, 5411 format=fmt, 5412 safe=safe, 5413 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5414 ) 5415 5416 def _parse_string_agg(self) -> exp.Expression: 5417 if self._match(TokenType.DISTINCT): 5418 args: t.List[t.Optional[exp.Expression]] = [ 5419 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5420 ] 5421 if self._match(TokenType.COMMA): 5422 args.extend(self._parse_csv(self._parse_assignment)) 5423 else: 5424 args = self._parse_csv(self._parse_assignment) # type: ignore 5425 5426 index = self._index 5427 if not self._match(TokenType.R_PAREN) and args: 5428 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5429 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5430 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5431 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5432 5433 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5434 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5435 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5436 if not self._match_text_seq("WITHIN", "GROUP"): 5437 self._retreat(index) 5438 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5439 5440 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5441 order = self._parse_order(this=seq_get(args, 0)) 5442 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5443 5444 def _parse_convert( 5445 self, strict: bool, safe: t.Optional[bool] = None 5446 ) -> t.Optional[exp.Expression]: 5447 this = self._parse_bitwise() 5448 5449 if self._match(TokenType.USING): 5450 to: t.Optional[exp.Expression] = self.expression( 5451 exp.CharacterSet, this=self._parse_var() 5452 ) 5453 elif self._match(TokenType.COMMA): 5454 to = self._parse_types() 5455 else: 5456 to = None 5457 5458 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5459 5460 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5461 """ 5462 There are generally two variants of the DECODE function: 5463 5464 - DECODE(bin, charset) 5465 - DECODE(expression, search, result [, search, result] ... [, default]) 5466 5467 The second variant will always be parsed into a CASE expression. Note that NULL 5468 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5469 instead of relying on pattern matching. 5470 """ 5471 args = self._parse_csv(self._parse_assignment) 5472 5473 if len(args) < 3: 5474 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5475 5476 expression, *expressions = args 5477 if not expression: 5478 return None 5479 5480 ifs = [] 5481 for search, result in zip(expressions[::2], expressions[1::2]): 5482 if not search or not result: 5483 return None 5484 5485 if isinstance(search, exp.Literal): 5486 ifs.append( 5487 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5488 ) 5489 elif isinstance(search, exp.Null): 5490 ifs.append( 5491 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5492 ) 5493 else: 5494 cond = exp.or_( 5495 exp.EQ(this=expression.copy(), expression=search), 5496 exp.and_( 5497 exp.Is(this=expression.copy(), expression=exp.Null()), 5498 exp.Is(this=search.copy(), expression=exp.Null()), 5499 copy=False, 5500 ), 5501 copy=False, 5502 ) 5503 ifs.append(exp.If(this=cond, true=result)) 5504 5505 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5506 5507 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5508 self._match_text_seq("KEY") 5509 key = self._parse_column() 5510 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5511 self._match_text_seq("VALUE") 5512 value = self._parse_bitwise() 5513 5514 if not key and not value: 5515 return None 5516 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5517 5518 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5519 if not this or not self._match_text_seq("FORMAT", "JSON"): 5520 return this 5521 5522 return self.expression(exp.FormatJson, this=this) 5523 5524 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5525 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5526 for value in values: 5527 if self._match_text_seq(value, "ON", on): 5528 return f"{value} ON {on}" 5529 5530 return None 5531 5532 @t.overload 5533 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5534 5535 @t.overload 5536 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5537 5538 def _parse_json_object(self, agg=False): 5539 star = self._parse_star() 5540 expressions = ( 5541 [star] 5542 if star 5543 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5544 ) 5545 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5546 5547 unique_keys = None 5548 if self._match_text_seq("WITH", "UNIQUE"): 5549 unique_keys = True 5550 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5551 unique_keys = False 5552 5553 self._match_text_seq("KEYS") 5554 5555 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5556 self._parse_type() 5557 ) 5558 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5559 5560 return self.expression( 5561 exp.JSONObjectAgg if agg else exp.JSONObject, 5562 expressions=expressions, 5563 null_handling=null_handling, 5564 unique_keys=unique_keys, 5565 return_type=return_type, 5566 encoding=encoding, 5567 ) 5568 5569 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5570 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5571 if not self._match_text_seq("NESTED"): 5572 this = self._parse_id_var() 5573 kind = self._parse_types(allow_identifiers=False) 5574 nested = None 5575 else: 5576 this = None 5577 kind = None 5578 nested = True 5579 5580 path = self._match_text_seq("PATH") and self._parse_string() 5581 nested_schema = nested and self._parse_json_schema() 5582 5583 return self.expression( 5584 exp.JSONColumnDef, 5585 this=this, 5586 kind=kind, 5587 path=path, 5588 nested_schema=nested_schema, 5589 ) 5590 5591 def _parse_json_schema(self) -> exp.JSONSchema: 5592 self._match_text_seq("COLUMNS") 5593 return self.expression( 5594 exp.JSONSchema, 5595 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5596 ) 5597 5598 def _parse_json_table(self) -> exp.JSONTable: 5599 this = self._parse_format_json(self._parse_bitwise()) 5600 path = self._match(TokenType.COMMA) and self._parse_string() 5601 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5602 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5603 schema = self._parse_json_schema() 5604 5605 return exp.JSONTable( 5606 this=this, 5607 schema=schema, 5608 path=path, 5609 error_handling=error_handling, 5610 empty_handling=empty_handling, 5611 ) 5612 5613 def _parse_match_against(self) -> exp.MatchAgainst: 5614 expressions = self._parse_csv(self._parse_column) 5615 5616 self._match_text_seq(")", "AGAINST", "(") 5617 5618 this = self._parse_string() 5619 5620 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5621 modifier = "IN NATURAL LANGUAGE MODE" 5622 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5623 modifier = f"{modifier} WITH QUERY EXPANSION" 5624 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5625 modifier = "IN BOOLEAN MODE" 5626 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5627 modifier = "WITH QUERY EXPANSION" 5628 else: 5629 modifier = None 5630 5631 return self.expression( 5632 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5633 ) 5634 5635 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5636 def _parse_open_json(self) -> exp.OpenJSON: 5637 this = self._parse_bitwise() 5638 path = self._match(TokenType.COMMA) and self._parse_string() 5639 5640 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5641 this = self._parse_field(any_token=True) 5642 kind = self._parse_types() 5643 path = self._parse_string() 5644 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5645 5646 return self.expression( 5647 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5648 ) 5649 5650 expressions = None 5651 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5652 self._match_l_paren() 5653 expressions = self._parse_csv(_parse_open_json_column_def) 5654 5655 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5656 5657 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5658 args = self._parse_csv(self._parse_bitwise) 5659 5660 if self._match(TokenType.IN): 5661 return self.expression( 5662 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5663 ) 5664 5665 if haystack_first: 5666 haystack = seq_get(args, 0) 5667 needle = seq_get(args, 1) 5668 else: 5669 needle = seq_get(args, 0) 5670 haystack = seq_get(args, 1) 5671 5672 return self.expression( 5673 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5674 ) 5675 5676 def _parse_predict(self) -> exp.Predict: 5677 self._match_text_seq("MODEL") 5678 this = self._parse_table() 5679 5680 self._match(TokenType.COMMA) 5681 self._match_text_seq("TABLE") 5682 5683 return self.expression( 5684 exp.Predict, 5685 this=this, 5686 expression=self._parse_table(), 5687 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5688 ) 5689 5690 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5691 args = self._parse_csv(self._parse_table) 5692 return exp.JoinHint(this=func_name.upper(), expressions=args) 5693 5694 def _parse_substring(self) -> exp.Substring: 5695 # Postgres supports the form: substring(string [from int] [for int]) 5696 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5697 5698 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5699 5700 if self._match(TokenType.FROM): 5701 args.append(self._parse_bitwise()) 5702 if self._match(TokenType.FOR): 5703 if len(args) == 1: 5704 args.append(exp.Literal.number(1)) 5705 args.append(self._parse_bitwise()) 5706 5707 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5708 5709 def _parse_trim(self) -> exp.Trim: 5710 # https://www.w3resource.com/sql/character-functions/trim.php 5711 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5712 5713 position = None 5714 collation = None 5715 expression = None 5716 5717 if self._match_texts(self.TRIM_TYPES): 5718 position = self._prev.text.upper() 5719 5720 this = self._parse_bitwise() 5721 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5722 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5723 expression = self._parse_bitwise() 5724 5725 if invert_order: 5726 this, expression = expression, this 5727 5728 if self._match(TokenType.COLLATE): 5729 collation = self._parse_bitwise() 5730 5731 return self.expression( 5732 exp.Trim, this=this, position=position, expression=expression, collation=collation 5733 ) 5734 5735 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5736 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5737 5738 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5739 return self._parse_window(self._parse_id_var(), alias=True) 5740 5741 def _parse_respect_or_ignore_nulls( 5742 self, this: t.Optional[exp.Expression] 5743 ) -> t.Optional[exp.Expression]: 5744 if self._match_text_seq("IGNORE", "NULLS"): 5745 return self.expression(exp.IgnoreNulls, this=this) 5746 if self._match_text_seq("RESPECT", "NULLS"): 5747 return self.expression(exp.RespectNulls, this=this) 5748 return this 5749 5750 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5751 if self._match(TokenType.HAVING): 5752 self._match_texts(("MAX", "MIN")) 5753 max = self._prev.text.upper() != "MIN" 5754 return self.expression( 5755 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5756 ) 5757 5758 return this 5759 5760 def _parse_window( 5761 self, this: t.Optional[exp.Expression], alias: bool = False 5762 ) -> t.Optional[exp.Expression]: 5763 func = this 5764 comments = func.comments if isinstance(func, exp.Expression) else None 5765 5766 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5767 self._match(TokenType.WHERE) 5768 this = self.expression( 5769 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5770 ) 5771 self._match_r_paren() 5772 5773 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5774 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5775 if self._match_text_seq("WITHIN", "GROUP"): 5776 order = self._parse_wrapped(self._parse_order) 5777 this = self.expression(exp.WithinGroup, this=this, expression=order) 5778 5779 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5780 # Some dialects choose to implement and some do not. 5781 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5782 5783 # There is some code above in _parse_lambda that handles 5784 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5785 5786 # The below changes handle 5787 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5788 5789 # Oracle allows both formats 5790 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5791 # and Snowflake chose to do the same for familiarity 5792 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5793 if isinstance(this, exp.AggFunc): 5794 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5795 5796 if ignore_respect and ignore_respect is not this: 5797 ignore_respect.replace(ignore_respect.this) 5798 this = self.expression(ignore_respect.__class__, this=this) 5799 5800 this = self._parse_respect_or_ignore_nulls(this) 5801 5802 # bigquery select from window x AS (partition by ...) 5803 if alias: 5804 over = None 5805 self._match(TokenType.ALIAS) 5806 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5807 return this 5808 else: 5809 over = self._prev.text.upper() 5810 5811 if comments and isinstance(func, exp.Expression): 5812 func.pop_comments() 5813 5814 if not self._match(TokenType.L_PAREN): 5815 return self.expression( 5816 exp.Window, 5817 comments=comments, 5818 this=this, 5819 alias=self._parse_id_var(False), 5820 over=over, 5821 ) 5822 5823 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5824 5825 first = self._match(TokenType.FIRST) 5826 if self._match_text_seq("LAST"): 5827 first = False 5828 5829 partition, order = self._parse_partition_and_order() 5830 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5831 5832 if kind: 5833 self._match(TokenType.BETWEEN) 5834 start = self._parse_window_spec() 5835 self._match(TokenType.AND) 5836 end = self._parse_window_spec() 5837 5838 spec = self.expression( 5839 exp.WindowSpec, 5840 kind=kind, 5841 start=start["value"], 5842 start_side=start["side"], 5843 end=end["value"], 5844 end_side=end["side"], 5845 ) 5846 else: 5847 spec = None 5848 5849 self._match_r_paren() 5850 5851 window = self.expression( 5852 exp.Window, 5853 comments=comments, 5854 this=this, 5855 partition_by=partition, 5856 order=order, 5857 spec=spec, 5858 alias=window_alias, 5859 over=over, 5860 first=first, 5861 ) 5862 5863 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5864 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5865 return self._parse_window(window, alias=alias) 5866 5867 return window 5868 5869 def _parse_partition_and_order( 5870 self, 5871 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5872 return self._parse_partition_by(), self._parse_order() 5873 5874 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5875 self._match(TokenType.BETWEEN) 5876 5877 return { 5878 "value": ( 5879 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5880 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5881 or self._parse_bitwise() 5882 ), 5883 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5884 } 5885 5886 def _parse_alias( 5887 self, this: t.Optional[exp.Expression], explicit: bool = False 5888 ) -> t.Optional[exp.Expression]: 5889 any_token = self._match(TokenType.ALIAS) 5890 comments = self._prev_comments or [] 5891 5892 if explicit and not any_token: 5893 return this 5894 5895 if self._match(TokenType.L_PAREN): 5896 aliases = self.expression( 5897 exp.Aliases, 5898 comments=comments, 5899 this=this, 5900 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5901 ) 5902 self._match_r_paren(aliases) 5903 return aliases 5904 5905 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5906 self.STRING_ALIASES and self._parse_string_as_identifier() 5907 ) 5908 5909 if alias: 5910 comments.extend(alias.pop_comments()) 5911 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5912 column = this.this 5913 5914 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5915 if not this.comments and column and column.comments: 5916 this.comments = column.pop_comments() 5917 5918 return this 5919 5920 def _parse_id_var( 5921 self, 5922 any_token: bool = True, 5923 tokens: t.Optional[t.Collection[TokenType]] = None, 5924 ) -> t.Optional[exp.Expression]: 5925 expression = self._parse_identifier() 5926 if not expression and ( 5927 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5928 ): 5929 quoted = self._prev.token_type == TokenType.STRING 5930 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5931 5932 return expression 5933 5934 def _parse_string(self) -> t.Optional[exp.Expression]: 5935 if self._match_set(self.STRING_PARSERS): 5936 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5937 return self._parse_placeholder() 5938 5939 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5940 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5941 5942 def _parse_number(self) -> t.Optional[exp.Expression]: 5943 if self._match_set(self.NUMERIC_PARSERS): 5944 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5945 return self._parse_placeholder() 5946 5947 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5948 if self._match(TokenType.IDENTIFIER): 5949 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5950 return self._parse_placeholder() 5951 5952 def _parse_var( 5953 self, 5954 any_token: bool = False, 5955 tokens: t.Optional[t.Collection[TokenType]] = None, 5956 upper: bool = False, 5957 ) -> t.Optional[exp.Expression]: 5958 if ( 5959 (any_token and self._advance_any()) 5960 or self._match(TokenType.VAR) 5961 or (self._match_set(tokens) if tokens else False) 5962 ): 5963 return self.expression( 5964 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5965 ) 5966 return self._parse_placeholder() 5967 5968 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5969 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5970 self._advance() 5971 return self._prev 5972 return None 5973 5974 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 5975 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 5976 5977 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5978 return self._parse_primary() or self._parse_var(any_token=True) 5979 5980 def _parse_null(self) -> t.Optional[exp.Expression]: 5981 if self._match_set(self.NULL_TOKENS): 5982 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5983 return self._parse_placeholder() 5984 5985 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5986 if self._match(TokenType.TRUE): 5987 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5988 if self._match(TokenType.FALSE): 5989 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5990 return self._parse_placeholder() 5991 5992 def _parse_star(self) -> t.Optional[exp.Expression]: 5993 if self._match(TokenType.STAR): 5994 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5995 return self._parse_placeholder() 5996 5997 def _parse_parameter(self) -> exp.Parameter: 5998 this = self._parse_identifier() or self._parse_primary_or_var() 5999 return self.expression(exp.Parameter, this=this) 6000 6001 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6002 if self._match_set(self.PLACEHOLDER_PARSERS): 6003 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6004 if placeholder: 6005 return placeholder 6006 self._advance(-1) 6007 return None 6008 6009 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6010 if not self._match_texts(keywords): 6011 return None 6012 if self._match(TokenType.L_PAREN, advance=False): 6013 return self._parse_wrapped_csv(self._parse_expression) 6014 6015 expression = self._parse_expression() 6016 return [expression] if expression else None 6017 6018 def _parse_csv( 6019 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6020 ) -> t.List[exp.Expression]: 6021 parse_result = parse_method() 6022 items = [parse_result] if parse_result is not None else [] 6023 6024 while self._match(sep): 6025 self._add_comments(parse_result) 6026 parse_result = parse_method() 6027 if parse_result is not None: 6028 items.append(parse_result) 6029 6030 return items 6031 6032 def _parse_tokens( 6033 self, parse_method: t.Callable, expressions: t.Dict 6034 ) -> t.Optional[exp.Expression]: 6035 this = parse_method() 6036 6037 while self._match_set(expressions): 6038 this = self.expression( 6039 expressions[self._prev.token_type], 6040 this=this, 6041 comments=self._prev_comments, 6042 expression=parse_method(), 6043 ) 6044 6045 return this 6046 6047 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6048 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6049 6050 def _parse_wrapped_csv( 6051 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6052 ) -> t.List[exp.Expression]: 6053 return self._parse_wrapped( 6054 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6055 ) 6056 6057 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6058 wrapped = self._match(TokenType.L_PAREN) 6059 if not wrapped and not optional: 6060 self.raise_error("Expecting (") 6061 parse_result = parse_method() 6062 if wrapped: 6063 self._match_r_paren() 6064 return parse_result 6065 6066 def _parse_expressions(self) -> t.List[exp.Expression]: 6067 return self._parse_csv(self._parse_expression) 6068 6069 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6070 return self._parse_select() or self._parse_set_operations( 6071 self._parse_expression() if alias else self._parse_assignment() 6072 ) 6073 6074 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6075 return self._parse_query_modifiers( 6076 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6077 ) 6078 6079 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6080 this = None 6081 if self._match_texts(self.TRANSACTION_KIND): 6082 this = self._prev.text 6083 6084 self._match_texts(("TRANSACTION", "WORK")) 6085 6086 modes = [] 6087 while True: 6088 mode = [] 6089 while self._match(TokenType.VAR): 6090 mode.append(self._prev.text) 6091 6092 if mode: 6093 modes.append(" ".join(mode)) 6094 if not self._match(TokenType.COMMA): 6095 break 6096 6097 return self.expression(exp.Transaction, this=this, modes=modes) 6098 6099 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6100 chain = None 6101 savepoint = None 6102 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6103 6104 self._match_texts(("TRANSACTION", "WORK")) 6105 6106 if self._match_text_seq("TO"): 6107 self._match_text_seq("SAVEPOINT") 6108 savepoint = self._parse_id_var() 6109 6110 if self._match(TokenType.AND): 6111 chain = not self._match_text_seq("NO") 6112 self._match_text_seq("CHAIN") 6113 6114 if is_rollback: 6115 return self.expression(exp.Rollback, savepoint=savepoint) 6116 6117 return self.expression(exp.Commit, chain=chain) 6118 6119 def _parse_refresh(self) -> exp.Refresh: 6120 self._match(TokenType.TABLE) 6121 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6122 6123 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6124 if not self._match_text_seq("ADD"): 6125 return None 6126 6127 self._match(TokenType.COLUMN) 6128 exists_column = self._parse_exists(not_=True) 6129 expression = self._parse_field_def() 6130 6131 if expression: 6132 expression.set("exists", exists_column) 6133 6134 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6135 if self._match_texts(("FIRST", "AFTER")): 6136 position = self._prev.text 6137 column_position = self.expression( 6138 exp.ColumnPosition, this=self._parse_column(), position=position 6139 ) 6140 expression.set("position", column_position) 6141 6142 return expression 6143 6144 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6145 drop = self._match(TokenType.DROP) and self._parse_drop() 6146 if drop and not isinstance(drop, exp.Command): 6147 drop.set("kind", drop.args.get("kind", "COLUMN")) 6148 return drop 6149 6150 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6151 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6152 return self.expression( 6153 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6154 ) 6155 6156 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6157 index = self._index - 1 6158 6159 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6160 return self._parse_csv( 6161 lambda: self.expression( 6162 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6163 ) 6164 ) 6165 6166 self._retreat(index) 6167 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6168 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6169 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6170 6171 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6172 if self._match_texts(self.ALTER_ALTER_PARSERS): 6173 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6174 6175 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6176 # keyword after ALTER we default to parsing this statement 6177 self._match(TokenType.COLUMN) 6178 column = self._parse_field(any_token=True) 6179 6180 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6181 return self.expression(exp.AlterColumn, this=column, drop=True) 6182 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6183 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6184 if self._match(TokenType.COMMENT): 6185 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6186 if self._match_text_seq("DROP", "NOT", "NULL"): 6187 return self.expression( 6188 exp.AlterColumn, 6189 this=column, 6190 drop=True, 6191 allow_null=True, 6192 ) 6193 if self._match_text_seq("SET", "NOT", "NULL"): 6194 return self.expression( 6195 exp.AlterColumn, 6196 this=column, 6197 allow_null=False, 6198 ) 6199 self._match_text_seq("SET", "DATA") 6200 self._match_text_seq("TYPE") 6201 return self.expression( 6202 exp.AlterColumn, 6203 this=column, 6204 dtype=self._parse_types(), 6205 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6206 using=self._match(TokenType.USING) and self._parse_assignment(), 6207 ) 6208 6209 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6210 if self._match_texts(("ALL", "EVEN", "AUTO")): 6211 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6212 6213 self._match_text_seq("KEY", "DISTKEY") 6214 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6215 6216 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6217 if compound: 6218 self._match_text_seq("SORTKEY") 6219 6220 if self._match(TokenType.L_PAREN, advance=False): 6221 return self.expression( 6222 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6223 ) 6224 6225 self._match_texts(("AUTO", "NONE")) 6226 return self.expression( 6227 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6228 ) 6229 6230 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6231 index = self._index - 1 6232 6233 partition_exists = self._parse_exists() 6234 if self._match(TokenType.PARTITION, advance=False): 6235 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6236 6237 self._retreat(index) 6238 return self._parse_csv(self._parse_drop_column) 6239 6240 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6241 if self._match(TokenType.COLUMN): 6242 exists = self._parse_exists() 6243 old_column = self._parse_column() 6244 to = self._match_text_seq("TO") 6245 new_column = self._parse_column() 6246 6247 if old_column is None or to is None or new_column is None: 6248 return None 6249 6250 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6251 6252 self._match_text_seq("TO") 6253 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6254 6255 def _parse_alter_table_set(self) -> exp.AlterSet: 6256 alter_set = self.expression(exp.AlterSet) 6257 6258 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6259 "TABLE", "PROPERTIES" 6260 ): 6261 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6262 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6263 alter_set.set("expressions", [self._parse_assignment()]) 6264 elif self._match_texts(("LOGGED", "UNLOGGED")): 6265 alter_set.set("option", exp.var(self._prev.text.upper())) 6266 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6267 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6268 elif self._match_text_seq("LOCATION"): 6269 alter_set.set("location", self._parse_field()) 6270 elif self._match_text_seq("ACCESS", "METHOD"): 6271 alter_set.set("access_method", self._parse_field()) 6272 elif self._match_text_seq("TABLESPACE"): 6273 alter_set.set("tablespace", self._parse_field()) 6274 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6275 alter_set.set("file_format", [self._parse_field()]) 6276 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6277 alter_set.set("file_format", self._parse_wrapped_options()) 6278 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6279 alter_set.set("copy_options", self._parse_wrapped_options()) 6280 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6281 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6282 else: 6283 if self._match_text_seq("SERDE"): 6284 alter_set.set("serde", self._parse_field()) 6285 6286 alter_set.set("expressions", [self._parse_properties()]) 6287 6288 return alter_set 6289 6290 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6291 start = self._prev 6292 6293 if not self._match(TokenType.TABLE): 6294 return self._parse_as_command(start) 6295 6296 exists = self._parse_exists() 6297 only = self._match_text_seq("ONLY") 6298 this = self._parse_table(schema=True) 6299 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6300 6301 if self._next: 6302 self._advance() 6303 6304 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6305 if parser: 6306 actions = ensure_list(parser(self)) 6307 options = self._parse_csv(self._parse_property) 6308 6309 if not self._curr and actions: 6310 return self.expression( 6311 exp.AlterTable, 6312 this=this, 6313 exists=exists, 6314 actions=actions, 6315 only=only, 6316 options=options, 6317 cluster=cluster, 6318 ) 6319 6320 return self._parse_as_command(start) 6321 6322 def _parse_merge(self) -> exp.Merge: 6323 self._match(TokenType.INTO) 6324 target = self._parse_table() 6325 6326 if target and self._match(TokenType.ALIAS, advance=False): 6327 target.set("alias", self._parse_table_alias()) 6328 6329 self._match(TokenType.USING) 6330 using = self._parse_table() 6331 6332 self._match(TokenType.ON) 6333 on = self._parse_assignment() 6334 6335 return self.expression( 6336 exp.Merge, 6337 this=target, 6338 using=using, 6339 on=on, 6340 expressions=self._parse_when_matched(), 6341 ) 6342 6343 def _parse_when_matched(self) -> t.List[exp.When]: 6344 whens = [] 6345 6346 while self._match(TokenType.WHEN): 6347 matched = not self._match(TokenType.NOT) 6348 self._match_text_seq("MATCHED") 6349 source = ( 6350 False 6351 if self._match_text_seq("BY", "TARGET") 6352 else self._match_text_seq("BY", "SOURCE") 6353 ) 6354 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6355 6356 self._match(TokenType.THEN) 6357 6358 if self._match(TokenType.INSERT): 6359 _this = self._parse_star() 6360 if _this: 6361 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6362 else: 6363 then = self.expression( 6364 exp.Insert, 6365 this=self._parse_value(), 6366 expression=self._match_text_seq("VALUES") and self._parse_value(), 6367 ) 6368 elif self._match(TokenType.UPDATE): 6369 expressions = self._parse_star() 6370 if expressions: 6371 then = self.expression(exp.Update, expressions=expressions) 6372 else: 6373 then = self.expression( 6374 exp.Update, 6375 expressions=self._match(TokenType.SET) 6376 and self._parse_csv(self._parse_equality), 6377 ) 6378 elif self._match(TokenType.DELETE): 6379 then = self.expression(exp.Var, this=self._prev.text) 6380 else: 6381 then = None 6382 6383 whens.append( 6384 self.expression( 6385 exp.When, 6386 matched=matched, 6387 source=source, 6388 condition=condition, 6389 then=then, 6390 ) 6391 ) 6392 return whens 6393 6394 def _parse_show(self) -> t.Optional[exp.Expression]: 6395 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6396 if parser: 6397 return parser(self) 6398 return self._parse_as_command(self._prev) 6399 6400 def _parse_set_item_assignment( 6401 self, kind: t.Optional[str] = None 6402 ) -> t.Optional[exp.Expression]: 6403 index = self._index 6404 6405 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6406 return self._parse_set_transaction(global_=kind == "GLOBAL") 6407 6408 left = self._parse_primary() or self._parse_column() 6409 assignment_delimiter = self._match_texts(("=", "TO")) 6410 6411 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6412 self._retreat(index) 6413 return None 6414 6415 right = self._parse_statement() or self._parse_id_var() 6416 if isinstance(right, (exp.Column, exp.Identifier)): 6417 right = exp.var(right.name) 6418 6419 this = self.expression(exp.EQ, this=left, expression=right) 6420 return self.expression(exp.SetItem, this=this, kind=kind) 6421 6422 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6423 self._match_text_seq("TRANSACTION") 6424 characteristics = self._parse_csv( 6425 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6426 ) 6427 return self.expression( 6428 exp.SetItem, 6429 expressions=characteristics, 6430 kind="TRANSACTION", 6431 **{"global": global_}, # type: ignore 6432 ) 6433 6434 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6435 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6436 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6437 6438 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6439 index = self._index 6440 set_ = self.expression( 6441 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6442 ) 6443 6444 if self._curr: 6445 self._retreat(index) 6446 return self._parse_as_command(self._prev) 6447 6448 return set_ 6449 6450 def _parse_var_from_options( 6451 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6452 ) -> t.Optional[exp.Var]: 6453 start = self._curr 6454 if not start: 6455 return None 6456 6457 option = start.text.upper() 6458 continuations = options.get(option) 6459 6460 index = self._index 6461 self._advance() 6462 for keywords in continuations or []: 6463 if isinstance(keywords, str): 6464 keywords = (keywords,) 6465 6466 if self._match_text_seq(*keywords): 6467 option = f"{option} {' '.join(keywords)}" 6468 break 6469 else: 6470 if continuations or continuations is None: 6471 if raise_unmatched: 6472 self.raise_error(f"Unknown option {option}") 6473 6474 self._retreat(index) 6475 return None 6476 6477 return exp.var(option) 6478 6479 def _parse_as_command(self, start: Token) -> exp.Command: 6480 while self._curr: 6481 self._advance() 6482 text = self._find_sql(start, self._prev) 6483 size = len(start.text) 6484 self._warn_unsupported() 6485 return exp.Command(this=text[:size], expression=text[size:]) 6486 6487 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6488 settings = [] 6489 6490 self._match_l_paren() 6491 kind = self._parse_id_var() 6492 6493 if self._match(TokenType.L_PAREN): 6494 while True: 6495 key = self._parse_id_var() 6496 value = self._parse_primary() 6497 6498 if not key and value is None: 6499 break 6500 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6501 self._match(TokenType.R_PAREN) 6502 6503 self._match_r_paren() 6504 6505 return self.expression( 6506 exp.DictProperty, 6507 this=this, 6508 kind=kind.this if kind else None, 6509 settings=settings, 6510 ) 6511 6512 def _parse_dict_range(self, this: str) -> exp.DictRange: 6513 self._match_l_paren() 6514 has_min = self._match_text_seq("MIN") 6515 if has_min: 6516 min = self._parse_var() or self._parse_primary() 6517 self._match_text_seq("MAX") 6518 max = self._parse_var() or self._parse_primary() 6519 else: 6520 max = self._parse_var() or self._parse_primary() 6521 min = exp.Literal.number(0) 6522 self._match_r_paren() 6523 return self.expression(exp.DictRange, this=this, min=min, max=max) 6524 6525 def _parse_comprehension( 6526 self, this: t.Optional[exp.Expression] 6527 ) -> t.Optional[exp.Comprehension]: 6528 index = self._index 6529 expression = self._parse_column() 6530 if not self._match(TokenType.IN): 6531 self._retreat(index - 1) 6532 return None 6533 iterator = self._parse_column() 6534 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6535 return self.expression( 6536 exp.Comprehension, 6537 this=this, 6538 expression=expression, 6539 iterator=iterator, 6540 condition=condition, 6541 ) 6542 6543 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6544 if self._match(TokenType.HEREDOC_STRING): 6545 return self.expression(exp.Heredoc, this=self._prev.text) 6546 6547 if not self._match_text_seq("$"): 6548 return None 6549 6550 tags = ["$"] 6551 tag_text = None 6552 6553 if self._is_connected(): 6554 self._advance() 6555 tags.append(self._prev.text.upper()) 6556 else: 6557 self.raise_error("No closing $ found") 6558 6559 if tags[-1] != "$": 6560 if self._is_connected() and self._match_text_seq("$"): 6561 tag_text = tags[-1] 6562 tags.append("$") 6563 else: 6564 self.raise_error("No closing $ found") 6565 6566 heredoc_start = self._curr 6567 6568 while self._curr: 6569 if self._match_text_seq(*tags, advance=False): 6570 this = self._find_sql(heredoc_start, self._prev) 6571 self._advance(len(tags)) 6572 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6573 6574 self._advance() 6575 6576 self.raise_error(f"No closing {''.join(tags)} found") 6577 return None 6578 6579 def _find_parser( 6580 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6581 ) -> t.Optional[t.Callable]: 6582 if not self._curr: 6583 return None 6584 6585 index = self._index 6586 this = [] 6587 while True: 6588 # The current token might be multiple words 6589 curr = self._curr.text.upper() 6590 key = curr.split(" ") 6591 this.append(curr) 6592 6593 self._advance() 6594 result, trie = in_trie(trie, key) 6595 if result == TrieResult.FAILED: 6596 break 6597 6598 if result == TrieResult.EXISTS: 6599 subparser = parsers[" ".join(this)] 6600 return subparser 6601 6602 self._retreat(index) 6603 return None 6604 6605 def _match(self, token_type, advance=True, expression=None): 6606 if not self._curr: 6607 return None 6608 6609 if self._curr.token_type == token_type: 6610 if advance: 6611 self._advance() 6612 self._add_comments(expression) 6613 return True 6614 6615 return None 6616 6617 def _match_set(self, types, advance=True): 6618 if not self._curr: 6619 return None 6620 6621 if self._curr.token_type in types: 6622 if advance: 6623 self._advance() 6624 return True 6625 6626 return None 6627 6628 def _match_pair(self, token_type_a, token_type_b, advance=True): 6629 if not self._curr or not self._next: 6630 return None 6631 6632 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6633 if advance: 6634 self._advance(2) 6635 return True 6636 6637 return None 6638 6639 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6640 if not self._match(TokenType.L_PAREN, expression=expression): 6641 self.raise_error("Expecting (") 6642 6643 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6644 if not self._match(TokenType.R_PAREN, expression=expression): 6645 self.raise_error("Expecting )") 6646 6647 def _match_texts(self, texts, advance=True): 6648 if self._curr and self._curr.text.upper() in texts: 6649 if advance: 6650 self._advance() 6651 return True 6652 return None 6653 6654 def _match_text_seq(self, *texts, advance=True): 6655 index = self._index 6656 for text in texts: 6657 if self._curr and self._curr.text.upper() == text: 6658 self._advance() 6659 else: 6660 self._retreat(index) 6661 return None 6662 6663 if not advance: 6664 self._retreat(index) 6665 6666 return True 6667 6668 def _replace_lambda( 6669 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6670 ) -> t.Optional[exp.Expression]: 6671 if not node: 6672 return node 6673 6674 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6675 6676 for column in node.find_all(exp.Column): 6677 typ = lambda_types.get(column.parts[0].name) 6678 if typ is not None: 6679 dot_or_id = column.to_dot() if column.table else column.this 6680 6681 if typ: 6682 dot_or_id = self.expression( 6683 exp.Cast, 6684 this=dot_or_id, 6685 to=typ, 6686 ) 6687 6688 parent = column.parent 6689 6690 while isinstance(parent, exp.Dot): 6691 if not isinstance(parent.parent, exp.Dot): 6692 parent.replace(dot_or_id) 6693 break 6694 parent = parent.parent 6695 else: 6696 if column is node: 6697 node = dot_or_id 6698 else: 6699 column.replace(dot_or_id) 6700 return node 6701 6702 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6703 start = self._prev 6704 6705 # Not to be confused with TRUNCATE(number, decimals) function call 6706 if self._match(TokenType.L_PAREN): 6707 self._retreat(self._index - 2) 6708 return self._parse_function() 6709 6710 # Clickhouse supports TRUNCATE DATABASE as well 6711 is_database = self._match(TokenType.DATABASE) 6712 6713 self._match(TokenType.TABLE) 6714 6715 exists = self._parse_exists(not_=False) 6716 6717 expressions = self._parse_csv( 6718 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6719 ) 6720 6721 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6722 6723 if self._match_text_seq("RESTART", "IDENTITY"): 6724 identity = "RESTART" 6725 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6726 identity = "CONTINUE" 6727 else: 6728 identity = None 6729 6730 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6731 option = self._prev.text 6732 else: 6733 option = None 6734 6735 partition = self._parse_partition() 6736 6737 # Fallback case 6738 if self._curr: 6739 return self._parse_as_command(start) 6740 6741 return self.expression( 6742 exp.TruncateTable, 6743 expressions=expressions, 6744 is_database=is_database, 6745 exists=exists, 6746 cluster=cluster, 6747 identity=identity, 6748 option=option, 6749 partition=partition, 6750 ) 6751 6752 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6753 this = self._parse_ordered(self._parse_opclass) 6754 6755 if not self._match(TokenType.WITH): 6756 return this 6757 6758 op = self._parse_var(any_token=True) 6759 6760 return self.expression(exp.WithOperator, this=this, op=op) 6761 6762 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6763 self._match(TokenType.EQ) 6764 self._match(TokenType.L_PAREN) 6765 6766 opts: t.List[t.Optional[exp.Expression]] = [] 6767 while self._curr and not self._match(TokenType.R_PAREN): 6768 if self._match_text_seq("FORMAT_NAME", "="): 6769 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6770 # so we parse it separately to use _parse_field() 6771 prop = self.expression( 6772 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6773 ) 6774 opts.append(prop) 6775 else: 6776 opts.append(self._parse_property()) 6777 6778 self._match(TokenType.COMMA) 6779 6780 return opts 6781 6782 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6783 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6784 6785 options = [] 6786 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6787 option = self._parse_var(any_token=True) 6788 prev = self._prev.text.upper() 6789 6790 # Different dialects might separate options and values by white space, "=" and "AS" 6791 self._match(TokenType.EQ) 6792 self._match(TokenType.ALIAS) 6793 6794 param = self.expression(exp.CopyParameter, this=option) 6795 6796 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6797 TokenType.L_PAREN, advance=False 6798 ): 6799 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6800 param.set("expressions", self._parse_wrapped_options()) 6801 elif prev == "FILE_FORMAT": 6802 # T-SQL's external file format case 6803 param.set("expression", self._parse_field()) 6804 else: 6805 param.set("expression", self._parse_unquoted_field()) 6806 6807 options.append(param) 6808 self._match(sep) 6809 6810 return options 6811 6812 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6813 expr = self.expression(exp.Credentials) 6814 6815 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6816 expr.set("storage", self._parse_field()) 6817 if self._match_text_seq("CREDENTIALS"): 6818 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6819 creds = ( 6820 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6821 ) 6822 expr.set("credentials", creds) 6823 if self._match_text_seq("ENCRYPTION"): 6824 expr.set("encryption", self._parse_wrapped_options()) 6825 if self._match_text_seq("IAM_ROLE"): 6826 expr.set("iam_role", self._parse_field()) 6827 if self._match_text_seq("REGION"): 6828 expr.set("region", self._parse_field()) 6829 6830 return expr 6831 6832 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6833 return self._parse_field() 6834 6835 def _parse_copy(self) -> exp.Copy | exp.Command: 6836 start = self._prev 6837 6838 self._match(TokenType.INTO) 6839 6840 this = ( 6841 self._parse_select(nested=True, parse_subquery_alias=False) 6842 if self._match(TokenType.L_PAREN, advance=False) 6843 else self._parse_table(schema=True) 6844 ) 6845 6846 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6847 6848 files = self._parse_csv(self._parse_file_location) 6849 credentials = self._parse_credentials() 6850 6851 self._match_text_seq("WITH") 6852 6853 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6854 6855 # Fallback case 6856 if self._curr: 6857 return self._parse_as_command(start) 6858 6859 return self.expression( 6860 exp.Copy, 6861 this=this, 6862 kind=kind, 6863 credentials=credentials, 6864 files=files, 6865 params=params, 6866 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
122class Parser(metaclass=_Parser): 123 """ 124 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 125 126 Args: 127 error_level: The desired error level. 128 Default: ErrorLevel.IMMEDIATE 129 error_message_context: The amount of context to capture from a query string when displaying 130 the error message (in number of characters). 131 Default: 100 132 max_errors: Maximum number of error messages to include in a raised ParseError. 133 This is only relevant if error_level is ErrorLevel.RAISE. 134 Default: 3 135 """ 136 137 FUNCTIONS: t.Dict[str, t.Callable] = { 138 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 139 "CONCAT": lambda args, dialect: exp.Concat( 140 expressions=args, 141 safe=not dialect.STRICT_STRING_CONCAT, 142 coalesce=dialect.CONCAT_COALESCE, 143 ), 144 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 145 expressions=args, 146 safe=not dialect.STRICT_STRING_CONCAT, 147 coalesce=dialect.CONCAT_COALESCE, 148 ), 149 "DATE_TO_DATE_STR": lambda args: exp.Cast( 150 this=seq_get(args, 0), 151 to=exp.DataType(this=exp.DataType.Type.TEXT), 152 ), 153 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 154 "HEX": build_hex, 155 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 156 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 157 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 158 "LIKE": build_like, 159 "LOG": build_logarithm, 160 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 161 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 162 "LOWER": build_lower, 163 "MOD": build_mod, 164 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 165 if len(args) != 2 166 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 167 "TIME_TO_TIME_STR": lambda args: exp.Cast( 168 this=seq_get(args, 0), 169 to=exp.DataType(this=exp.DataType.Type.TEXT), 170 ), 171 "TO_HEX": build_hex, 172 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 173 this=exp.Cast( 174 this=seq_get(args, 0), 175 to=exp.DataType(this=exp.DataType.Type.TEXT), 176 ), 177 start=exp.Literal.number(1), 178 length=exp.Literal.number(10), 179 ), 180 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 181 "UPPER": build_upper, 182 "VAR_MAP": build_var_map, 183 } 184 185 NO_PAREN_FUNCTIONS = { 186 TokenType.CURRENT_DATE: exp.CurrentDate, 187 TokenType.CURRENT_DATETIME: exp.CurrentDate, 188 TokenType.CURRENT_TIME: exp.CurrentTime, 189 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 190 TokenType.CURRENT_USER: exp.CurrentUser, 191 } 192 193 STRUCT_TYPE_TOKENS = { 194 TokenType.NESTED, 195 TokenType.OBJECT, 196 TokenType.STRUCT, 197 } 198 199 NESTED_TYPE_TOKENS = { 200 TokenType.ARRAY, 201 TokenType.LIST, 202 TokenType.LOWCARDINALITY, 203 TokenType.MAP, 204 TokenType.NULLABLE, 205 *STRUCT_TYPE_TOKENS, 206 } 207 208 ENUM_TYPE_TOKENS = { 209 TokenType.ENUM, 210 TokenType.ENUM8, 211 TokenType.ENUM16, 212 } 213 214 AGGREGATE_TYPE_TOKENS = { 215 TokenType.AGGREGATEFUNCTION, 216 TokenType.SIMPLEAGGREGATEFUNCTION, 217 } 218 219 TYPE_TOKENS = { 220 TokenType.BIT, 221 TokenType.BOOLEAN, 222 TokenType.TINYINT, 223 TokenType.UTINYINT, 224 TokenType.SMALLINT, 225 TokenType.USMALLINT, 226 TokenType.INT, 227 TokenType.UINT, 228 TokenType.BIGINT, 229 TokenType.UBIGINT, 230 TokenType.INT128, 231 TokenType.UINT128, 232 TokenType.INT256, 233 TokenType.UINT256, 234 TokenType.MEDIUMINT, 235 TokenType.UMEDIUMINT, 236 TokenType.FIXEDSTRING, 237 TokenType.FLOAT, 238 TokenType.DOUBLE, 239 TokenType.CHAR, 240 TokenType.NCHAR, 241 TokenType.VARCHAR, 242 TokenType.NVARCHAR, 243 TokenType.BPCHAR, 244 TokenType.TEXT, 245 TokenType.MEDIUMTEXT, 246 TokenType.LONGTEXT, 247 TokenType.MEDIUMBLOB, 248 TokenType.LONGBLOB, 249 TokenType.BINARY, 250 TokenType.VARBINARY, 251 TokenType.JSON, 252 TokenType.JSONB, 253 TokenType.INTERVAL, 254 TokenType.TINYBLOB, 255 TokenType.TINYTEXT, 256 TokenType.TIME, 257 TokenType.TIMETZ, 258 TokenType.TIMESTAMP, 259 TokenType.TIMESTAMP_S, 260 TokenType.TIMESTAMP_MS, 261 TokenType.TIMESTAMP_NS, 262 TokenType.TIMESTAMPTZ, 263 TokenType.TIMESTAMPLTZ, 264 TokenType.TIMESTAMPNTZ, 265 TokenType.DATETIME, 266 TokenType.DATETIME64, 267 TokenType.DATE, 268 TokenType.DATE32, 269 TokenType.INT4RANGE, 270 TokenType.INT4MULTIRANGE, 271 TokenType.INT8RANGE, 272 TokenType.INT8MULTIRANGE, 273 TokenType.NUMRANGE, 274 TokenType.NUMMULTIRANGE, 275 TokenType.TSRANGE, 276 TokenType.TSMULTIRANGE, 277 TokenType.TSTZRANGE, 278 TokenType.TSTZMULTIRANGE, 279 TokenType.DATERANGE, 280 TokenType.DATEMULTIRANGE, 281 TokenType.DECIMAL, 282 TokenType.UDECIMAL, 283 TokenType.BIGDECIMAL, 284 TokenType.UUID, 285 TokenType.GEOGRAPHY, 286 TokenType.GEOMETRY, 287 TokenType.HLLSKETCH, 288 TokenType.HSTORE, 289 TokenType.PSEUDO_TYPE, 290 TokenType.SUPER, 291 TokenType.SERIAL, 292 TokenType.SMALLSERIAL, 293 TokenType.BIGSERIAL, 294 TokenType.XML, 295 TokenType.YEAR, 296 TokenType.UNIQUEIDENTIFIER, 297 TokenType.USERDEFINED, 298 TokenType.MONEY, 299 TokenType.SMALLMONEY, 300 TokenType.ROWVERSION, 301 TokenType.IMAGE, 302 TokenType.VARIANT, 303 TokenType.OBJECT, 304 TokenType.OBJECT_IDENTIFIER, 305 TokenType.INET, 306 TokenType.IPADDRESS, 307 TokenType.IPPREFIX, 308 TokenType.IPV4, 309 TokenType.IPV6, 310 TokenType.UNKNOWN, 311 TokenType.NULL, 312 TokenType.NAME, 313 TokenType.TDIGEST, 314 *ENUM_TYPE_TOKENS, 315 *NESTED_TYPE_TOKENS, 316 *AGGREGATE_TYPE_TOKENS, 317 } 318 319 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 320 TokenType.BIGINT: TokenType.UBIGINT, 321 TokenType.INT: TokenType.UINT, 322 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 323 TokenType.SMALLINT: TokenType.USMALLINT, 324 TokenType.TINYINT: TokenType.UTINYINT, 325 TokenType.DECIMAL: TokenType.UDECIMAL, 326 } 327 328 SUBQUERY_PREDICATES = { 329 TokenType.ANY: exp.Any, 330 TokenType.ALL: exp.All, 331 TokenType.EXISTS: exp.Exists, 332 TokenType.SOME: exp.Any, 333 } 334 335 RESERVED_TOKENS = { 336 *Tokenizer.SINGLE_TOKENS.values(), 337 TokenType.SELECT, 338 } - {TokenType.IDENTIFIER} 339 340 DB_CREATABLES = { 341 TokenType.DATABASE, 342 TokenType.DICTIONARY, 343 TokenType.MODEL, 344 TokenType.SCHEMA, 345 TokenType.SEQUENCE, 346 TokenType.STORAGE_INTEGRATION, 347 TokenType.TABLE, 348 TokenType.TAG, 349 TokenType.VIEW, 350 TokenType.WAREHOUSE, 351 TokenType.STREAMLIT, 352 } 353 354 CREATABLES = { 355 TokenType.COLUMN, 356 TokenType.CONSTRAINT, 357 TokenType.FOREIGN_KEY, 358 TokenType.FUNCTION, 359 TokenType.INDEX, 360 TokenType.PROCEDURE, 361 *DB_CREATABLES, 362 } 363 364 # Tokens that can represent identifiers 365 ID_VAR_TOKENS = { 366 TokenType.VAR, 367 TokenType.ANTI, 368 TokenType.APPLY, 369 TokenType.ASC, 370 TokenType.ASOF, 371 TokenType.AUTO_INCREMENT, 372 TokenType.BEGIN, 373 TokenType.BPCHAR, 374 TokenType.CACHE, 375 TokenType.CASE, 376 TokenType.COLLATE, 377 TokenType.COMMAND, 378 TokenType.COMMENT, 379 TokenType.COMMIT, 380 TokenType.CONSTRAINT, 381 TokenType.COPY, 382 TokenType.DEFAULT, 383 TokenType.DELETE, 384 TokenType.DESC, 385 TokenType.DESCRIBE, 386 TokenType.DICTIONARY, 387 TokenType.DIV, 388 TokenType.END, 389 TokenType.EXECUTE, 390 TokenType.ESCAPE, 391 TokenType.FALSE, 392 TokenType.FIRST, 393 TokenType.FILTER, 394 TokenType.FINAL, 395 TokenType.FORMAT, 396 TokenType.FULL, 397 TokenType.IDENTIFIER, 398 TokenType.IS, 399 TokenType.ISNULL, 400 TokenType.INTERVAL, 401 TokenType.KEEP, 402 TokenType.KILL, 403 TokenType.LEFT, 404 TokenType.LOAD, 405 TokenType.MERGE, 406 TokenType.NATURAL, 407 TokenType.NEXT, 408 TokenType.OFFSET, 409 TokenType.OPERATOR, 410 TokenType.ORDINALITY, 411 TokenType.OVERLAPS, 412 TokenType.OVERWRITE, 413 TokenType.PARTITION, 414 TokenType.PERCENT, 415 TokenType.PIVOT, 416 TokenType.PRAGMA, 417 TokenType.RANGE, 418 TokenType.RECURSIVE, 419 TokenType.REFERENCES, 420 TokenType.REFRESH, 421 TokenType.REPLACE, 422 TokenType.RIGHT, 423 TokenType.ROLLUP, 424 TokenType.ROW, 425 TokenType.ROWS, 426 TokenType.SEMI, 427 TokenType.SET, 428 TokenType.SETTINGS, 429 TokenType.SHOW, 430 TokenType.TEMPORARY, 431 TokenType.TOP, 432 TokenType.TRUE, 433 TokenType.TRUNCATE, 434 TokenType.UNIQUE, 435 TokenType.UNNEST, 436 TokenType.UNPIVOT, 437 TokenType.UPDATE, 438 TokenType.USE, 439 TokenType.VOLATILE, 440 TokenType.WINDOW, 441 *CREATABLES, 442 *SUBQUERY_PREDICATES, 443 *TYPE_TOKENS, 444 *NO_PAREN_FUNCTIONS, 445 } 446 447 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 448 449 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 450 TokenType.ANTI, 451 TokenType.APPLY, 452 TokenType.ASOF, 453 TokenType.FULL, 454 TokenType.LEFT, 455 TokenType.LOCK, 456 TokenType.NATURAL, 457 TokenType.OFFSET, 458 TokenType.RIGHT, 459 TokenType.SEMI, 460 TokenType.WINDOW, 461 } 462 463 ALIAS_TOKENS = ID_VAR_TOKENS 464 465 ARRAY_CONSTRUCTORS = { 466 "ARRAY": exp.Array, 467 "LIST": exp.List, 468 } 469 470 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 471 472 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 473 474 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 475 476 FUNC_TOKENS = { 477 TokenType.COLLATE, 478 TokenType.COMMAND, 479 TokenType.CURRENT_DATE, 480 TokenType.CURRENT_DATETIME, 481 TokenType.CURRENT_TIMESTAMP, 482 TokenType.CURRENT_TIME, 483 TokenType.CURRENT_USER, 484 TokenType.FILTER, 485 TokenType.FIRST, 486 TokenType.FORMAT, 487 TokenType.GLOB, 488 TokenType.IDENTIFIER, 489 TokenType.INDEX, 490 TokenType.ISNULL, 491 TokenType.ILIKE, 492 TokenType.INSERT, 493 TokenType.LIKE, 494 TokenType.MERGE, 495 TokenType.OFFSET, 496 TokenType.PRIMARY_KEY, 497 TokenType.RANGE, 498 TokenType.REPLACE, 499 TokenType.RLIKE, 500 TokenType.ROW, 501 TokenType.UNNEST, 502 TokenType.VAR, 503 TokenType.LEFT, 504 TokenType.RIGHT, 505 TokenType.SEQUENCE, 506 TokenType.DATE, 507 TokenType.DATETIME, 508 TokenType.TABLE, 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TRUNCATE, 512 TokenType.WINDOW, 513 TokenType.XOR, 514 *TYPE_TOKENS, 515 *SUBQUERY_PREDICATES, 516 } 517 518 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 519 TokenType.AND: exp.And, 520 } 521 522 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 523 TokenType.COLON_EQ: exp.PropertyEQ, 524 } 525 526 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 527 TokenType.OR: exp.Or, 528 } 529 530 EQUALITY = { 531 TokenType.EQ: exp.EQ, 532 TokenType.NEQ: exp.NEQ, 533 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 534 } 535 536 COMPARISON = { 537 TokenType.GT: exp.GT, 538 TokenType.GTE: exp.GTE, 539 TokenType.LT: exp.LT, 540 TokenType.LTE: exp.LTE, 541 } 542 543 BITWISE = { 544 TokenType.AMP: exp.BitwiseAnd, 545 TokenType.CARET: exp.BitwiseXor, 546 TokenType.PIPE: exp.BitwiseOr, 547 } 548 549 TERM = { 550 TokenType.DASH: exp.Sub, 551 TokenType.PLUS: exp.Add, 552 TokenType.MOD: exp.Mod, 553 TokenType.COLLATE: exp.Collate, 554 } 555 556 FACTOR = { 557 TokenType.DIV: exp.IntDiv, 558 TokenType.LR_ARROW: exp.Distance, 559 TokenType.SLASH: exp.Div, 560 TokenType.STAR: exp.Mul, 561 } 562 563 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 564 565 TIMES = { 566 TokenType.TIME, 567 TokenType.TIMETZ, 568 } 569 570 TIMESTAMPS = { 571 TokenType.TIMESTAMP, 572 TokenType.TIMESTAMPTZ, 573 TokenType.TIMESTAMPLTZ, 574 *TIMES, 575 } 576 577 SET_OPERATIONS = { 578 TokenType.UNION, 579 TokenType.INTERSECT, 580 TokenType.EXCEPT, 581 } 582 583 JOIN_METHODS = { 584 TokenType.ASOF, 585 TokenType.NATURAL, 586 TokenType.POSITIONAL, 587 } 588 589 JOIN_SIDES = { 590 TokenType.LEFT, 591 TokenType.RIGHT, 592 TokenType.FULL, 593 } 594 595 JOIN_KINDS = { 596 TokenType.ANTI, 597 TokenType.CROSS, 598 TokenType.INNER, 599 TokenType.OUTER, 600 TokenType.SEMI, 601 TokenType.STRAIGHT_JOIN, 602 } 603 604 JOIN_HINTS: t.Set[str] = set() 605 606 LAMBDAS = { 607 TokenType.ARROW: lambda self, expressions: self.expression( 608 exp.Lambda, 609 this=self._replace_lambda( 610 self._parse_assignment(), 611 expressions, 612 ), 613 expressions=expressions, 614 ), 615 TokenType.FARROW: lambda self, expressions: self.expression( 616 exp.Kwarg, 617 this=exp.var(expressions[0].name), 618 expression=self._parse_assignment(), 619 ), 620 } 621 622 COLUMN_OPERATORS = { 623 TokenType.DOT: None, 624 TokenType.DCOLON: lambda self, this, to: self.expression( 625 exp.Cast if self.STRICT_CAST else exp.TryCast, 626 this=this, 627 to=to, 628 ), 629 TokenType.ARROW: lambda self, this, path: self.expression( 630 exp.JSONExtract, 631 this=this, 632 expression=self.dialect.to_json_path(path), 633 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 634 ), 635 TokenType.DARROW: lambda self, this, path: self.expression( 636 exp.JSONExtractScalar, 637 this=this, 638 expression=self.dialect.to_json_path(path), 639 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 640 ), 641 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 642 exp.JSONBExtract, 643 this=this, 644 expression=path, 645 ), 646 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 647 exp.JSONBExtractScalar, 648 this=this, 649 expression=path, 650 ), 651 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 652 exp.JSONBContains, 653 this=this, 654 expression=key, 655 ), 656 } 657 658 EXPRESSION_PARSERS = { 659 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 660 exp.Column: lambda self: self._parse_column(), 661 exp.Condition: lambda self: self._parse_assignment(), 662 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 663 exp.Expression: lambda self: self._parse_expression(), 664 exp.From: lambda self: self._parse_from(joins=True), 665 exp.Group: lambda self: self._parse_group(), 666 exp.Having: lambda self: self._parse_having(), 667 exp.Identifier: lambda self: self._parse_id_var(), 668 exp.Join: lambda self: self._parse_join(), 669 exp.Lambda: lambda self: self._parse_lambda(), 670 exp.Lateral: lambda self: self._parse_lateral(), 671 exp.Limit: lambda self: self._parse_limit(), 672 exp.Offset: lambda self: self._parse_offset(), 673 exp.Order: lambda self: self._parse_order(), 674 exp.Ordered: lambda self: self._parse_ordered(), 675 exp.Properties: lambda self: self._parse_properties(), 676 exp.Qualify: lambda self: self._parse_qualify(), 677 exp.Returning: lambda self: self._parse_returning(), 678 exp.Select: lambda self: self._parse_select(), 679 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 680 exp.Table: lambda self: self._parse_table_parts(), 681 exp.TableAlias: lambda self: self._parse_table_alias(), 682 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 683 exp.Where: lambda self: self._parse_where(), 684 exp.Window: lambda self: self._parse_named_window(), 685 exp.With: lambda self: self._parse_with(), 686 "JOIN_TYPE": lambda self: self._parse_join_parts(), 687 } 688 689 STATEMENT_PARSERS = { 690 TokenType.ALTER: lambda self: self._parse_alter(), 691 TokenType.BEGIN: lambda self: self._parse_transaction(), 692 TokenType.CACHE: lambda self: self._parse_cache(), 693 TokenType.COMMENT: lambda self: self._parse_comment(), 694 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 695 TokenType.COPY: lambda self: self._parse_copy(), 696 TokenType.CREATE: lambda self: self._parse_create(), 697 TokenType.DELETE: lambda self: self._parse_delete(), 698 TokenType.DESC: lambda self: self._parse_describe(), 699 TokenType.DESCRIBE: lambda self: self._parse_describe(), 700 TokenType.DROP: lambda self: self._parse_drop(), 701 TokenType.INSERT: lambda self: self._parse_insert(), 702 TokenType.KILL: lambda self: self._parse_kill(), 703 TokenType.LOAD: lambda self: self._parse_load(), 704 TokenType.MERGE: lambda self: self._parse_merge(), 705 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 706 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 707 TokenType.REFRESH: lambda self: self._parse_refresh(), 708 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 709 TokenType.SET: lambda self: self._parse_set(), 710 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 711 TokenType.UNCACHE: lambda self: self._parse_uncache(), 712 TokenType.UPDATE: lambda self: self._parse_update(), 713 TokenType.USE: lambda self: self.expression( 714 exp.Use, 715 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 716 this=self._parse_table(schema=False), 717 ), 718 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 719 } 720 721 UNARY_PARSERS = { 722 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 723 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 724 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 725 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 726 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 727 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 728 } 729 730 STRING_PARSERS = { 731 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 732 exp.RawString, this=token.text 733 ), 734 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 735 exp.National, this=token.text 736 ), 737 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 738 TokenType.STRING: lambda self, token: self.expression( 739 exp.Literal, this=token.text, is_string=True 740 ), 741 TokenType.UNICODE_STRING: lambda self, token: self.expression( 742 exp.UnicodeString, 743 this=token.text, 744 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 745 ), 746 } 747 748 NUMERIC_PARSERS = { 749 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 750 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 751 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 752 TokenType.NUMBER: lambda self, token: self.expression( 753 exp.Literal, this=token.text, is_string=False 754 ), 755 } 756 757 PRIMARY_PARSERS = { 758 **STRING_PARSERS, 759 **NUMERIC_PARSERS, 760 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 761 TokenType.NULL: lambda self, _: self.expression(exp.Null), 762 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 763 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 764 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 765 TokenType.STAR: lambda self, _: self.expression( 766 exp.Star, 767 **{ 768 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 769 "replace": self._parse_star_op("REPLACE"), 770 "rename": self._parse_star_op("RENAME"), 771 }, 772 ), 773 } 774 775 PLACEHOLDER_PARSERS = { 776 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 777 TokenType.PARAMETER: lambda self: self._parse_parameter(), 778 TokenType.COLON: lambda self: ( 779 self.expression(exp.Placeholder, this=self._prev.text) 780 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 781 else None 782 ), 783 } 784 785 RANGE_PARSERS = { 786 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 787 TokenType.GLOB: binary_range_parser(exp.Glob), 788 TokenType.ILIKE: binary_range_parser(exp.ILike), 789 TokenType.IN: lambda self, this: self._parse_in(this), 790 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 791 TokenType.IS: lambda self, this: self._parse_is(this), 792 TokenType.LIKE: binary_range_parser(exp.Like), 793 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 794 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 795 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 796 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 797 } 798 799 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 800 "ALLOWED_VALUES": lambda self: self.expression( 801 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 802 ), 803 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 804 "AUTO": lambda self: self._parse_auto_property(), 805 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 806 "BACKUP": lambda self: self.expression( 807 exp.BackupProperty, this=self._parse_var(any_token=True) 808 ), 809 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 810 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 811 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 812 "CHECKSUM": lambda self: self._parse_checksum(), 813 "CLUSTER BY": lambda self: self._parse_cluster(), 814 "CLUSTERED": lambda self: self._parse_clustered_by(), 815 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 816 exp.CollateProperty, **kwargs 817 ), 818 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 819 "CONTAINS": lambda self: self._parse_contains_property(), 820 "COPY": lambda self: self._parse_copy_property(), 821 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 822 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 823 "DEFINER": lambda self: self._parse_definer(), 824 "DETERMINISTIC": lambda self: self.expression( 825 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 826 ), 827 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 828 "DISTKEY": lambda self: self._parse_distkey(), 829 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 830 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 831 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 832 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 833 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 834 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 835 "FREESPACE": lambda self: self._parse_freespace(), 836 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 837 "HEAP": lambda self: self.expression(exp.HeapProperty), 838 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 839 "IMMUTABLE": lambda self: self.expression( 840 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 841 ), 842 "INHERITS": lambda self: self.expression( 843 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 844 ), 845 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 846 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 847 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 848 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 849 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 850 "LIKE": lambda self: self._parse_create_like(), 851 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 852 "LOCK": lambda self: self._parse_locking(), 853 "LOCKING": lambda self: self._parse_locking(), 854 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 855 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 856 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 857 "MODIFIES": lambda self: self._parse_modifies_property(), 858 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 859 "NO": lambda self: self._parse_no_property(), 860 "ON": lambda self: self._parse_on_property(), 861 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 862 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 863 "PARTITION": lambda self: self._parse_partitioned_of(), 864 "PARTITION BY": lambda self: self._parse_partitioned_by(), 865 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 866 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 867 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 868 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 869 "READS": lambda self: self._parse_reads_property(), 870 "REMOTE": lambda self: self._parse_remote_with_connection(), 871 "RETURNS": lambda self: self._parse_returns(), 872 "STRICT": lambda self: self.expression(exp.StrictProperty), 873 "ROW": lambda self: self._parse_row(), 874 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 875 "SAMPLE": lambda self: self.expression( 876 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 877 ), 878 "SECURE": lambda self: self.expression(exp.SecureProperty), 879 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 880 "SETTINGS": lambda self: self.expression( 881 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 882 ), 883 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 884 "SORTKEY": lambda self: self._parse_sortkey(), 885 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 886 "STABLE": lambda self: self.expression( 887 exp.StabilityProperty, this=exp.Literal.string("STABLE") 888 ), 889 "STORED": lambda self: self._parse_stored(), 890 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 891 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 892 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 893 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 894 "TO": lambda self: self._parse_to_table(), 895 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 896 "TRANSFORM": lambda self: self.expression( 897 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 898 ), 899 "TTL": lambda self: self._parse_ttl(), 900 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 901 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 902 "VOLATILE": lambda self: self._parse_volatile_property(), 903 "WITH": lambda self: self._parse_with_property(), 904 } 905 906 CONSTRAINT_PARSERS = { 907 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 908 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 909 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 910 "CHARACTER SET": lambda self: self.expression( 911 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 912 ), 913 "CHECK": lambda self: self.expression( 914 exp.CheckColumnConstraint, 915 this=self._parse_wrapped(self._parse_assignment), 916 enforced=self._match_text_seq("ENFORCED"), 917 ), 918 "COLLATE": lambda self: self.expression( 919 exp.CollateColumnConstraint, this=self._parse_var(any_token=True) 920 ), 921 "COMMENT": lambda self: self.expression( 922 exp.CommentColumnConstraint, this=self._parse_string() 923 ), 924 "COMPRESS": lambda self: self._parse_compress(), 925 "CLUSTERED": lambda self: self.expression( 926 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 927 ), 928 "NONCLUSTERED": lambda self: self.expression( 929 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 930 ), 931 "DEFAULT": lambda self: self.expression( 932 exp.DefaultColumnConstraint, this=self._parse_bitwise() 933 ), 934 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 935 "EPHEMERAL": lambda self: self.expression( 936 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 937 ), 938 "EXCLUDE": lambda self: self.expression( 939 exp.ExcludeColumnConstraint, this=self._parse_index_params() 940 ), 941 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 942 "FORMAT": lambda self: self.expression( 943 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 944 ), 945 "GENERATED": lambda self: self._parse_generated_as_identity(), 946 "IDENTITY": lambda self: self._parse_auto_increment(), 947 "INLINE": lambda self: self._parse_inline(), 948 "LIKE": lambda self: self._parse_create_like(), 949 "NOT": lambda self: self._parse_not_constraint(), 950 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 951 "ON": lambda self: ( 952 self._match(TokenType.UPDATE) 953 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 954 ) 955 or self.expression(exp.OnProperty, this=self._parse_id_var()), 956 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 957 "PERIOD": lambda self: self._parse_period_for_system_time(), 958 "PRIMARY KEY": lambda self: self._parse_primary_key(), 959 "REFERENCES": lambda self: self._parse_references(match=False), 960 "TITLE": lambda self: self.expression( 961 exp.TitleColumnConstraint, this=self._parse_var_or_string() 962 ), 963 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 964 "UNIQUE": lambda self: self._parse_unique(), 965 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 966 "WITH": lambda self: self.expression( 967 exp.Properties, expressions=self._parse_wrapped_properties() 968 ), 969 } 970 971 ALTER_PARSERS = { 972 "ADD": lambda self: self._parse_alter_table_add(), 973 "ALTER": lambda self: self._parse_alter_table_alter(), 974 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 975 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 976 "DROP": lambda self: self._parse_alter_table_drop(), 977 "RENAME": lambda self: self._parse_alter_table_rename(), 978 "SET": lambda self: self._parse_alter_table_set(), 979 } 980 981 ALTER_ALTER_PARSERS = { 982 "DISTKEY": lambda self: self._parse_alter_diststyle(), 983 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 984 "SORTKEY": lambda self: self._parse_alter_sortkey(), 985 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 986 } 987 988 SCHEMA_UNNAMED_CONSTRAINTS = { 989 "CHECK", 990 "EXCLUDE", 991 "FOREIGN KEY", 992 "LIKE", 993 "PERIOD", 994 "PRIMARY KEY", 995 "UNIQUE", 996 } 997 998 NO_PAREN_FUNCTION_PARSERS = { 999 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1000 "CASE": lambda self: self._parse_case(), 1001 "CONNECT_BY_ROOT": lambda self: self.expression( 1002 exp.ConnectByRoot, this=self._parse_column() 1003 ), 1004 "IF": lambda self: self._parse_if(), 1005 "NEXT": lambda self: self._parse_next_value_for(), 1006 } 1007 1008 INVALID_FUNC_NAME_TOKENS = { 1009 TokenType.IDENTIFIER, 1010 TokenType.STRING, 1011 } 1012 1013 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1014 1015 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1016 1017 FUNCTION_PARSERS = { 1018 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1019 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1020 "DECODE": lambda self: self._parse_decode(), 1021 "EXTRACT": lambda self: self._parse_extract(), 1022 "GAP_FILL": lambda self: self._parse_gap_fill(), 1023 "JSON_OBJECT": lambda self: self._parse_json_object(), 1024 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1025 "JSON_TABLE": lambda self: self._parse_json_table(), 1026 "MATCH": lambda self: self._parse_match_against(), 1027 "OPENJSON": lambda self: self._parse_open_json(), 1028 "POSITION": lambda self: self._parse_position(), 1029 "PREDICT": lambda self: self._parse_predict(), 1030 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1031 "STRING_AGG": lambda self: self._parse_string_agg(), 1032 "SUBSTRING": lambda self: self._parse_substring(), 1033 "TRIM": lambda self: self._parse_trim(), 1034 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1035 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1036 } 1037 1038 QUERY_MODIFIER_PARSERS = { 1039 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1040 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1041 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1042 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1043 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1044 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1045 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1046 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1047 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1048 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1049 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1050 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1051 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1052 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1053 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1054 TokenType.CLUSTER_BY: lambda self: ( 1055 "cluster", 1056 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1057 ), 1058 TokenType.DISTRIBUTE_BY: lambda self: ( 1059 "distribute", 1060 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1061 ), 1062 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1063 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1064 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1065 } 1066 1067 SET_PARSERS = { 1068 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1069 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1070 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1071 "TRANSACTION": lambda self: self._parse_set_transaction(), 1072 } 1073 1074 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1075 1076 TYPE_LITERAL_PARSERS = { 1077 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1078 } 1079 1080 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1081 1082 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1083 1084 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1085 1086 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1087 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1088 "ISOLATION": ( 1089 ("LEVEL", "REPEATABLE", "READ"), 1090 ("LEVEL", "READ", "COMMITTED"), 1091 ("LEVEL", "READ", "UNCOMITTED"), 1092 ("LEVEL", "SERIALIZABLE"), 1093 ), 1094 "READ": ("WRITE", "ONLY"), 1095 } 1096 1097 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1098 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1099 ) 1100 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1101 1102 CREATE_SEQUENCE: OPTIONS_TYPE = { 1103 "SCALE": ("EXTEND", "NOEXTEND"), 1104 "SHARD": ("EXTEND", "NOEXTEND"), 1105 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1106 **dict.fromkeys( 1107 ( 1108 "SESSION", 1109 "GLOBAL", 1110 "KEEP", 1111 "NOKEEP", 1112 "ORDER", 1113 "NOORDER", 1114 "NOCACHE", 1115 "CYCLE", 1116 "NOCYCLE", 1117 "NOMINVALUE", 1118 "NOMAXVALUE", 1119 "NOSCALE", 1120 "NOSHARD", 1121 ), 1122 tuple(), 1123 ), 1124 } 1125 1126 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1127 1128 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1129 1130 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1131 1132 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1133 1134 CLONE_KEYWORDS = {"CLONE", "COPY"} 1135 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1136 1137 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1138 1139 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1140 1141 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1142 1143 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1144 1145 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1146 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1147 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1148 1149 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1150 1151 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1152 1153 ADD_CONSTRAINT_TOKENS = { 1154 TokenType.CONSTRAINT, 1155 TokenType.FOREIGN_KEY, 1156 TokenType.INDEX, 1157 TokenType.KEY, 1158 TokenType.PRIMARY_KEY, 1159 TokenType.UNIQUE, 1160 } 1161 1162 DISTINCT_TOKENS = {TokenType.DISTINCT} 1163 1164 NULL_TOKENS = {TokenType.NULL} 1165 1166 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1167 1168 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1169 1170 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1171 1172 STRICT_CAST = True 1173 1174 PREFIXED_PIVOT_COLUMNS = False 1175 IDENTIFY_PIVOT_STRINGS = False 1176 1177 LOG_DEFAULTS_TO_LN = False 1178 1179 # Whether ADD is present for each column added by ALTER TABLE 1180 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1181 1182 # Whether the table sample clause expects CSV syntax 1183 TABLESAMPLE_CSV = False 1184 1185 # The default method used for table sampling 1186 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1187 1188 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1189 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1190 1191 # Whether the TRIM function expects the characters to trim as its first argument 1192 TRIM_PATTERN_FIRST = False 1193 1194 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1195 STRING_ALIASES = False 1196 1197 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1198 MODIFIERS_ATTACHED_TO_SET_OP = True 1199 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1200 1201 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1202 NO_PAREN_IF_COMMANDS = True 1203 1204 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1205 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1206 1207 # Whether the `:` operator is used to extract a value from a JSON document 1208 COLON_IS_JSON_EXTRACT = False 1209 1210 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1211 # If this is True and '(' is not found, the keyword will be treated as an identifier 1212 VALUES_FOLLOWED_BY_PAREN = True 1213 1214 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1215 SUPPORTS_IMPLICIT_UNNEST = False 1216 1217 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1218 INTERVAL_SPANS = True 1219 1220 # Whether a PARTITION clause can follow a table reference 1221 SUPPORTS_PARTITION_SELECTION = False 1222 1223 __slots__ = ( 1224 "error_level", 1225 "error_message_context", 1226 "max_errors", 1227 "dialect", 1228 "sql", 1229 "errors", 1230 "_tokens", 1231 "_index", 1232 "_curr", 1233 "_next", 1234 "_prev", 1235 "_prev_comments", 1236 ) 1237 1238 # Autofilled 1239 SHOW_TRIE: t.Dict = {} 1240 SET_TRIE: t.Dict = {} 1241 1242 def __init__( 1243 self, 1244 error_level: t.Optional[ErrorLevel] = None, 1245 error_message_context: int = 100, 1246 max_errors: int = 3, 1247 dialect: DialectType = None, 1248 ): 1249 from sqlglot.dialects import Dialect 1250 1251 self.error_level = error_level or ErrorLevel.IMMEDIATE 1252 self.error_message_context = error_message_context 1253 self.max_errors = max_errors 1254 self.dialect = Dialect.get_or_raise(dialect) 1255 self.reset() 1256 1257 def reset(self): 1258 self.sql = "" 1259 self.errors = [] 1260 self._tokens = [] 1261 self._index = 0 1262 self._curr = None 1263 self._next = None 1264 self._prev = None 1265 self._prev_comments = None 1266 1267 def parse( 1268 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1269 ) -> t.List[t.Optional[exp.Expression]]: 1270 """ 1271 Parses a list of tokens and returns a list of syntax trees, one tree 1272 per parsed SQL statement. 1273 1274 Args: 1275 raw_tokens: The list of tokens. 1276 sql: The original SQL string, used to produce helpful debug messages. 1277 1278 Returns: 1279 The list of the produced syntax trees. 1280 """ 1281 return self._parse( 1282 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1283 ) 1284 1285 def parse_into( 1286 self, 1287 expression_types: exp.IntoType, 1288 raw_tokens: t.List[Token], 1289 sql: t.Optional[str] = None, 1290 ) -> t.List[t.Optional[exp.Expression]]: 1291 """ 1292 Parses a list of tokens into a given Expression type. If a collection of Expression 1293 types is given instead, this method will try to parse the token list into each one 1294 of them, stopping at the first for which the parsing succeeds. 1295 1296 Args: 1297 expression_types: The expression type(s) to try and parse the token list into. 1298 raw_tokens: The list of tokens. 1299 sql: The original SQL string, used to produce helpful debug messages. 1300 1301 Returns: 1302 The target Expression. 1303 """ 1304 errors = [] 1305 for expression_type in ensure_list(expression_types): 1306 parser = self.EXPRESSION_PARSERS.get(expression_type) 1307 if not parser: 1308 raise TypeError(f"No parser registered for {expression_type}") 1309 1310 try: 1311 return self._parse(parser, raw_tokens, sql) 1312 except ParseError as e: 1313 e.errors[0]["into_expression"] = expression_type 1314 errors.append(e) 1315 1316 raise ParseError( 1317 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1318 errors=merge_errors(errors), 1319 ) from errors[-1] 1320 1321 def _parse( 1322 self, 1323 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1324 raw_tokens: t.List[Token], 1325 sql: t.Optional[str] = None, 1326 ) -> t.List[t.Optional[exp.Expression]]: 1327 self.reset() 1328 self.sql = sql or "" 1329 1330 total = len(raw_tokens) 1331 chunks: t.List[t.List[Token]] = [[]] 1332 1333 for i, token in enumerate(raw_tokens): 1334 if token.token_type == TokenType.SEMICOLON: 1335 if token.comments: 1336 chunks.append([token]) 1337 1338 if i < total - 1: 1339 chunks.append([]) 1340 else: 1341 chunks[-1].append(token) 1342 1343 expressions = [] 1344 1345 for tokens in chunks: 1346 self._index = -1 1347 self._tokens = tokens 1348 self._advance() 1349 1350 expressions.append(parse_method(self)) 1351 1352 if self._index < len(self._tokens): 1353 self.raise_error("Invalid expression / Unexpected token") 1354 1355 self.check_errors() 1356 1357 return expressions 1358 1359 def check_errors(self) -> None: 1360 """Logs or raises any found errors, depending on the chosen error level setting.""" 1361 if self.error_level == ErrorLevel.WARN: 1362 for error in self.errors: 1363 logger.error(str(error)) 1364 elif self.error_level == ErrorLevel.RAISE and self.errors: 1365 raise ParseError( 1366 concat_messages(self.errors, self.max_errors), 1367 errors=merge_errors(self.errors), 1368 ) 1369 1370 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1371 """ 1372 Appends an error in the list of recorded errors or raises it, depending on the chosen 1373 error level setting. 1374 """ 1375 token = token or self._curr or self._prev or Token.string("") 1376 start = token.start 1377 end = token.end + 1 1378 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1379 highlight = self.sql[start:end] 1380 end_context = self.sql[end : end + self.error_message_context] 1381 1382 error = ParseError.new( 1383 f"{message}. Line {token.line}, Col: {token.col}.\n" 1384 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1385 description=message, 1386 line=token.line, 1387 col=token.col, 1388 start_context=start_context, 1389 highlight=highlight, 1390 end_context=end_context, 1391 ) 1392 1393 if self.error_level == ErrorLevel.IMMEDIATE: 1394 raise error 1395 1396 self.errors.append(error) 1397 1398 def expression( 1399 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1400 ) -> E: 1401 """ 1402 Creates a new, validated Expression. 1403 1404 Args: 1405 exp_class: The expression class to instantiate. 1406 comments: An optional list of comments to attach to the expression. 1407 kwargs: The arguments to set for the expression along with their respective values. 1408 1409 Returns: 1410 The target expression. 1411 """ 1412 instance = exp_class(**kwargs) 1413 instance.add_comments(comments) if comments else self._add_comments(instance) 1414 return self.validate_expression(instance) 1415 1416 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1417 if expression and self._prev_comments: 1418 expression.add_comments(self._prev_comments) 1419 self._prev_comments = None 1420 1421 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1422 """ 1423 Validates an Expression, making sure that all its mandatory arguments are set. 1424 1425 Args: 1426 expression: The expression to validate. 1427 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1428 1429 Returns: 1430 The validated expression. 1431 """ 1432 if self.error_level != ErrorLevel.IGNORE: 1433 for error_message in expression.error_messages(args): 1434 self.raise_error(error_message) 1435 1436 return expression 1437 1438 def _find_sql(self, start: Token, end: Token) -> str: 1439 return self.sql[start.start : end.end + 1] 1440 1441 def _is_connected(self) -> bool: 1442 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1443 1444 def _advance(self, times: int = 1) -> None: 1445 self._index += times 1446 self._curr = seq_get(self._tokens, self._index) 1447 self._next = seq_get(self._tokens, self._index + 1) 1448 1449 if self._index > 0: 1450 self._prev = self._tokens[self._index - 1] 1451 self._prev_comments = self._prev.comments 1452 else: 1453 self._prev = None 1454 self._prev_comments = None 1455 1456 def _retreat(self, index: int) -> None: 1457 if index != self._index: 1458 self._advance(index - self._index) 1459 1460 def _warn_unsupported(self) -> None: 1461 if len(self._tokens) <= 1: 1462 return 1463 1464 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1465 # interested in emitting a warning for the one being currently processed. 1466 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1467 1468 logger.warning( 1469 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1470 ) 1471 1472 def _parse_command(self) -> exp.Command: 1473 self._warn_unsupported() 1474 return self.expression( 1475 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1476 ) 1477 1478 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1479 """ 1480 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1481 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1482 the parser state accordingly 1483 """ 1484 index = self._index 1485 error_level = self.error_level 1486 1487 self.error_level = ErrorLevel.IMMEDIATE 1488 try: 1489 this = parse_method() 1490 except ParseError: 1491 this = None 1492 finally: 1493 if not this or retreat: 1494 self._retreat(index) 1495 self.error_level = error_level 1496 1497 return this 1498 1499 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1500 start = self._prev 1501 exists = self._parse_exists() if allow_exists else None 1502 1503 self._match(TokenType.ON) 1504 1505 materialized = self._match_text_seq("MATERIALIZED") 1506 kind = self._match_set(self.CREATABLES) and self._prev 1507 if not kind: 1508 return self._parse_as_command(start) 1509 1510 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1511 this = self._parse_user_defined_function(kind=kind.token_type) 1512 elif kind.token_type == TokenType.TABLE: 1513 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1514 elif kind.token_type == TokenType.COLUMN: 1515 this = self._parse_column() 1516 else: 1517 this = self._parse_id_var() 1518 1519 self._match(TokenType.IS) 1520 1521 return self.expression( 1522 exp.Comment, 1523 this=this, 1524 kind=kind.text, 1525 expression=self._parse_string(), 1526 exists=exists, 1527 materialized=materialized, 1528 ) 1529 1530 def _parse_to_table( 1531 self, 1532 ) -> exp.ToTableProperty: 1533 table = self._parse_table_parts(schema=True) 1534 return self.expression(exp.ToTableProperty, this=table) 1535 1536 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1537 def _parse_ttl(self) -> exp.Expression: 1538 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1539 this = self._parse_bitwise() 1540 1541 if self._match_text_seq("DELETE"): 1542 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1543 if self._match_text_seq("RECOMPRESS"): 1544 return self.expression( 1545 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1546 ) 1547 if self._match_text_seq("TO", "DISK"): 1548 return self.expression( 1549 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1550 ) 1551 if self._match_text_seq("TO", "VOLUME"): 1552 return self.expression( 1553 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1554 ) 1555 1556 return this 1557 1558 expressions = self._parse_csv(_parse_ttl_action) 1559 where = self._parse_where() 1560 group = self._parse_group() 1561 1562 aggregates = None 1563 if group and self._match(TokenType.SET): 1564 aggregates = self._parse_csv(self._parse_set_item) 1565 1566 return self.expression( 1567 exp.MergeTreeTTL, 1568 expressions=expressions, 1569 where=where, 1570 group=group, 1571 aggregates=aggregates, 1572 ) 1573 1574 def _parse_statement(self) -> t.Optional[exp.Expression]: 1575 if self._curr is None: 1576 return None 1577 1578 if self._match_set(self.STATEMENT_PARSERS): 1579 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1580 1581 if self._match_set(self.dialect.tokenizer.COMMANDS): 1582 return self._parse_command() 1583 1584 expression = self._parse_expression() 1585 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1586 return self._parse_query_modifiers(expression) 1587 1588 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1589 start = self._prev 1590 temporary = self._match(TokenType.TEMPORARY) 1591 materialized = self._match_text_seq("MATERIALIZED") 1592 1593 kind = self._match_set(self.CREATABLES) and self._prev.text 1594 if not kind: 1595 return self._parse_as_command(start) 1596 1597 if_exists = exists or self._parse_exists() 1598 table = self._parse_table_parts( 1599 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1600 ) 1601 1602 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1603 1604 if self._match(TokenType.L_PAREN, advance=False): 1605 expressions = self._parse_wrapped_csv(self._parse_types) 1606 else: 1607 expressions = None 1608 1609 return self.expression( 1610 exp.Drop, 1611 comments=start.comments, 1612 exists=if_exists, 1613 this=table, 1614 expressions=expressions, 1615 kind=kind.upper(), 1616 temporary=temporary, 1617 materialized=materialized, 1618 cascade=self._match_text_seq("CASCADE"), 1619 constraints=self._match_text_seq("CONSTRAINTS"), 1620 purge=self._match_text_seq("PURGE"), 1621 cluster=cluster, 1622 ) 1623 1624 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1625 return ( 1626 self._match_text_seq("IF") 1627 and (not not_ or self._match(TokenType.NOT)) 1628 and self._match(TokenType.EXISTS) 1629 ) 1630 1631 def _parse_create(self) -> exp.Create | exp.Command: 1632 # Note: this can't be None because we've matched a statement parser 1633 start = self._prev 1634 comments = self._prev_comments 1635 1636 replace = ( 1637 start.token_type == TokenType.REPLACE 1638 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1639 or self._match_pair(TokenType.OR, TokenType.ALTER) 1640 ) 1641 1642 unique = self._match(TokenType.UNIQUE) 1643 1644 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1645 self._advance() 1646 1647 properties = None 1648 create_token = self._match_set(self.CREATABLES) and self._prev 1649 1650 if not create_token: 1651 # exp.Properties.Location.POST_CREATE 1652 properties = self._parse_properties() 1653 create_token = self._match_set(self.CREATABLES) and self._prev 1654 1655 if not properties or not create_token: 1656 return self._parse_as_command(start) 1657 1658 exists = self._parse_exists(not_=True) 1659 this = None 1660 expression: t.Optional[exp.Expression] = None 1661 indexes = None 1662 no_schema_binding = None 1663 begin = None 1664 end = None 1665 clone = None 1666 1667 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1668 nonlocal properties 1669 if properties and temp_props: 1670 properties.expressions.extend(temp_props.expressions) 1671 elif temp_props: 1672 properties = temp_props 1673 1674 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1675 this = self._parse_user_defined_function(kind=create_token.token_type) 1676 1677 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1678 extend_props(self._parse_properties()) 1679 1680 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1681 extend_props(self._parse_properties()) 1682 1683 if not expression: 1684 if self._match(TokenType.COMMAND): 1685 expression = self._parse_as_command(self._prev) 1686 else: 1687 begin = self._match(TokenType.BEGIN) 1688 return_ = self._match_text_seq("RETURN") 1689 1690 if self._match(TokenType.STRING, advance=False): 1691 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1692 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1693 expression = self._parse_string() 1694 extend_props(self._parse_properties()) 1695 else: 1696 expression = self._parse_statement() 1697 1698 end = self._match_text_seq("END") 1699 1700 if return_: 1701 expression = self.expression(exp.Return, this=expression) 1702 elif create_token.token_type == TokenType.INDEX: 1703 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1704 if not self._match(TokenType.ON): 1705 index = self._parse_id_var() 1706 anonymous = False 1707 else: 1708 index = None 1709 anonymous = True 1710 1711 this = self._parse_index(index=index, anonymous=anonymous) 1712 elif create_token.token_type in self.DB_CREATABLES: 1713 table_parts = self._parse_table_parts( 1714 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1715 ) 1716 1717 # exp.Properties.Location.POST_NAME 1718 self._match(TokenType.COMMA) 1719 extend_props(self._parse_properties(before=True)) 1720 1721 this = self._parse_schema(this=table_parts) 1722 1723 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1724 extend_props(self._parse_properties()) 1725 1726 self._match(TokenType.ALIAS) 1727 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1728 # exp.Properties.Location.POST_ALIAS 1729 extend_props(self._parse_properties()) 1730 1731 if create_token.token_type == TokenType.SEQUENCE: 1732 expression = self._parse_types() 1733 extend_props(self._parse_properties()) 1734 else: 1735 expression = self._parse_ddl_select() 1736 1737 if create_token.token_type == TokenType.TABLE: 1738 # exp.Properties.Location.POST_EXPRESSION 1739 extend_props(self._parse_properties()) 1740 1741 indexes = [] 1742 while True: 1743 index = self._parse_index() 1744 1745 # exp.Properties.Location.POST_INDEX 1746 extend_props(self._parse_properties()) 1747 1748 if not index: 1749 break 1750 else: 1751 self._match(TokenType.COMMA) 1752 indexes.append(index) 1753 elif create_token.token_type == TokenType.VIEW: 1754 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1755 no_schema_binding = True 1756 1757 shallow = self._match_text_seq("SHALLOW") 1758 1759 if self._match_texts(self.CLONE_KEYWORDS): 1760 copy = self._prev.text.lower() == "copy" 1761 clone = self.expression( 1762 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1763 ) 1764 1765 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1766 return self._parse_as_command(start) 1767 1768 return self.expression( 1769 exp.Create, 1770 comments=comments, 1771 this=this, 1772 kind=create_token.text.upper(), 1773 replace=replace, 1774 unique=unique, 1775 expression=expression, 1776 exists=exists, 1777 properties=properties, 1778 indexes=indexes, 1779 no_schema_binding=no_schema_binding, 1780 begin=begin, 1781 end=end, 1782 clone=clone, 1783 ) 1784 1785 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1786 seq = exp.SequenceProperties() 1787 1788 options = [] 1789 index = self._index 1790 1791 while self._curr: 1792 self._match(TokenType.COMMA) 1793 if self._match_text_seq("INCREMENT"): 1794 self._match_text_seq("BY") 1795 self._match_text_seq("=") 1796 seq.set("increment", self._parse_term()) 1797 elif self._match_text_seq("MINVALUE"): 1798 seq.set("minvalue", self._parse_term()) 1799 elif self._match_text_seq("MAXVALUE"): 1800 seq.set("maxvalue", self._parse_term()) 1801 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1802 self._match_text_seq("=") 1803 seq.set("start", self._parse_term()) 1804 elif self._match_text_seq("CACHE"): 1805 # T-SQL allows empty CACHE which is initialized dynamically 1806 seq.set("cache", self._parse_number() or True) 1807 elif self._match_text_seq("OWNED", "BY"): 1808 # "OWNED BY NONE" is the default 1809 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1810 else: 1811 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1812 if opt: 1813 options.append(opt) 1814 else: 1815 break 1816 1817 seq.set("options", options if options else None) 1818 return None if self._index == index else seq 1819 1820 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1821 # only used for teradata currently 1822 self._match(TokenType.COMMA) 1823 1824 kwargs = { 1825 "no": self._match_text_seq("NO"), 1826 "dual": self._match_text_seq("DUAL"), 1827 "before": self._match_text_seq("BEFORE"), 1828 "default": self._match_text_seq("DEFAULT"), 1829 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1830 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1831 "after": self._match_text_seq("AFTER"), 1832 "minimum": self._match_texts(("MIN", "MINIMUM")), 1833 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1834 } 1835 1836 if self._match_texts(self.PROPERTY_PARSERS): 1837 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1838 try: 1839 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1840 except TypeError: 1841 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1842 1843 return None 1844 1845 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1846 return self._parse_wrapped_csv(self._parse_property) 1847 1848 def _parse_property(self) -> t.Optional[exp.Expression]: 1849 if self._match_texts(self.PROPERTY_PARSERS): 1850 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1851 1852 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1853 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1854 1855 if self._match_text_seq("COMPOUND", "SORTKEY"): 1856 return self._parse_sortkey(compound=True) 1857 1858 if self._match_text_seq("SQL", "SECURITY"): 1859 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1860 1861 index = self._index 1862 key = self._parse_column() 1863 1864 if not self._match(TokenType.EQ): 1865 self._retreat(index) 1866 return self._parse_sequence_properties() 1867 1868 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 1869 if isinstance(key, exp.Column): 1870 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 1871 1872 value = self._parse_bitwise() or self._parse_var(any_token=True) 1873 1874 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 1875 if isinstance(value, exp.Column): 1876 value = exp.var(value.name) 1877 1878 return self.expression(exp.Property, this=key, value=value) 1879 1880 def _parse_stored(self) -> exp.FileFormatProperty: 1881 self._match(TokenType.ALIAS) 1882 1883 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1884 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1885 1886 return self.expression( 1887 exp.FileFormatProperty, 1888 this=( 1889 self.expression( 1890 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1891 ) 1892 if input_format or output_format 1893 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1894 ), 1895 ) 1896 1897 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 1898 field = self._parse_field() 1899 if isinstance(field, exp.Identifier) and not field.quoted: 1900 field = exp.var(field) 1901 1902 return field 1903 1904 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1905 self._match(TokenType.EQ) 1906 self._match(TokenType.ALIAS) 1907 1908 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1909 1910 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1911 properties = [] 1912 while True: 1913 if before: 1914 prop = self._parse_property_before() 1915 else: 1916 prop = self._parse_property() 1917 if not prop: 1918 break 1919 for p in ensure_list(prop): 1920 properties.append(p) 1921 1922 if properties: 1923 return self.expression(exp.Properties, expressions=properties) 1924 1925 return None 1926 1927 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1928 return self.expression( 1929 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1930 ) 1931 1932 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1933 if self._index >= 2: 1934 pre_volatile_token = self._tokens[self._index - 2] 1935 else: 1936 pre_volatile_token = None 1937 1938 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1939 return exp.VolatileProperty() 1940 1941 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1942 1943 def _parse_retention_period(self) -> exp.Var: 1944 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 1945 number = self._parse_number() 1946 number_str = f"{number} " if number else "" 1947 unit = self._parse_var(any_token=True) 1948 return exp.var(f"{number_str}{unit}") 1949 1950 def _parse_system_versioning_property( 1951 self, with_: bool = False 1952 ) -> exp.WithSystemVersioningProperty: 1953 self._match(TokenType.EQ) 1954 prop = self.expression( 1955 exp.WithSystemVersioningProperty, 1956 **{ # type: ignore 1957 "on": True, 1958 "with": with_, 1959 }, 1960 ) 1961 1962 if self._match_text_seq("OFF"): 1963 prop.set("on", False) 1964 return prop 1965 1966 self._match(TokenType.ON) 1967 if self._match(TokenType.L_PAREN): 1968 while self._curr and not self._match(TokenType.R_PAREN): 1969 if self._match_text_seq("HISTORY_TABLE", "="): 1970 prop.set("this", self._parse_table_parts()) 1971 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 1972 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 1973 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 1974 prop.set("retention_period", self._parse_retention_period()) 1975 1976 self._match(TokenType.COMMA) 1977 1978 return prop 1979 1980 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 1981 self._match(TokenType.EQ) 1982 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 1983 prop = self.expression(exp.DataDeletionProperty, on=on) 1984 1985 if self._match(TokenType.L_PAREN): 1986 while self._curr and not self._match(TokenType.R_PAREN): 1987 if self._match_text_seq("FILTER_COLUMN", "="): 1988 prop.set("filter_column", self._parse_column()) 1989 elif self._match_text_seq("RETENTION_PERIOD", "="): 1990 prop.set("retention_period", self._parse_retention_period()) 1991 1992 self._match(TokenType.COMMA) 1993 1994 return prop 1995 1996 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1997 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 1998 prop = self._parse_system_versioning_property(with_=True) 1999 self._match_r_paren() 2000 return prop 2001 2002 if self._match(TokenType.L_PAREN, advance=False): 2003 return self._parse_wrapped_properties() 2004 2005 if self._match_text_seq("JOURNAL"): 2006 return self._parse_withjournaltable() 2007 2008 if self._match_texts(self.VIEW_ATTRIBUTES): 2009 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2010 2011 if self._match_text_seq("DATA"): 2012 return self._parse_withdata(no=False) 2013 elif self._match_text_seq("NO", "DATA"): 2014 return self._parse_withdata(no=True) 2015 2016 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2017 return self._parse_serde_properties(with_=True) 2018 2019 if not self._next: 2020 return None 2021 2022 return self._parse_withisolatedloading() 2023 2024 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2025 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2026 self._match(TokenType.EQ) 2027 2028 user = self._parse_id_var() 2029 self._match(TokenType.PARAMETER) 2030 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2031 2032 if not user or not host: 2033 return None 2034 2035 return exp.DefinerProperty(this=f"{user}@{host}") 2036 2037 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2038 self._match(TokenType.TABLE) 2039 self._match(TokenType.EQ) 2040 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2041 2042 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2043 return self.expression(exp.LogProperty, no=no) 2044 2045 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2046 return self.expression(exp.JournalProperty, **kwargs) 2047 2048 def _parse_checksum(self) -> exp.ChecksumProperty: 2049 self._match(TokenType.EQ) 2050 2051 on = None 2052 if self._match(TokenType.ON): 2053 on = True 2054 elif self._match_text_seq("OFF"): 2055 on = False 2056 2057 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2058 2059 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2060 return self.expression( 2061 exp.Cluster, 2062 expressions=( 2063 self._parse_wrapped_csv(self._parse_ordered) 2064 if wrapped 2065 else self._parse_csv(self._parse_ordered) 2066 ), 2067 ) 2068 2069 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2070 self._match_text_seq("BY") 2071 2072 self._match_l_paren() 2073 expressions = self._parse_csv(self._parse_column) 2074 self._match_r_paren() 2075 2076 if self._match_text_seq("SORTED", "BY"): 2077 self._match_l_paren() 2078 sorted_by = self._parse_csv(self._parse_ordered) 2079 self._match_r_paren() 2080 else: 2081 sorted_by = None 2082 2083 self._match(TokenType.INTO) 2084 buckets = self._parse_number() 2085 self._match_text_seq("BUCKETS") 2086 2087 return self.expression( 2088 exp.ClusteredByProperty, 2089 expressions=expressions, 2090 sorted_by=sorted_by, 2091 buckets=buckets, 2092 ) 2093 2094 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2095 if not self._match_text_seq("GRANTS"): 2096 self._retreat(self._index - 1) 2097 return None 2098 2099 return self.expression(exp.CopyGrantsProperty) 2100 2101 def _parse_freespace(self) -> exp.FreespaceProperty: 2102 self._match(TokenType.EQ) 2103 return self.expression( 2104 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2105 ) 2106 2107 def _parse_mergeblockratio( 2108 self, no: bool = False, default: bool = False 2109 ) -> exp.MergeBlockRatioProperty: 2110 if self._match(TokenType.EQ): 2111 return self.expression( 2112 exp.MergeBlockRatioProperty, 2113 this=self._parse_number(), 2114 percent=self._match(TokenType.PERCENT), 2115 ) 2116 2117 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2118 2119 def _parse_datablocksize( 2120 self, 2121 default: t.Optional[bool] = None, 2122 minimum: t.Optional[bool] = None, 2123 maximum: t.Optional[bool] = None, 2124 ) -> exp.DataBlocksizeProperty: 2125 self._match(TokenType.EQ) 2126 size = self._parse_number() 2127 2128 units = None 2129 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2130 units = self._prev.text 2131 2132 return self.expression( 2133 exp.DataBlocksizeProperty, 2134 size=size, 2135 units=units, 2136 default=default, 2137 minimum=minimum, 2138 maximum=maximum, 2139 ) 2140 2141 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2142 self._match(TokenType.EQ) 2143 always = self._match_text_seq("ALWAYS") 2144 manual = self._match_text_seq("MANUAL") 2145 never = self._match_text_seq("NEVER") 2146 default = self._match_text_seq("DEFAULT") 2147 2148 autotemp = None 2149 if self._match_text_seq("AUTOTEMP"): 2150 autotemp = self._parse_schema() 2151 2152 return self.expression( 2153 exp.BlockCompressionProperty, 2154 always=always, 2155 manual=manual, 2156 never=never, 2157 default=default, 2158 autotemp=autotemp, 2159 ) 2160 2161 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2162 index = self._index 2163 no = self._match_text_seq("NO") 2164 concurrent = self._match_text_seq("CONCURRENT") 2165 2166 if not self._match_text_seq("ISOLATED", "LOADING"): 2167 self._retreat(index) 2168 return None 2169 2170 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2171 return self.expression( 2172 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2173 ) 2174 2175 def _parse_locking(self) -> exp.LockingProperty: 2176 if self._match(TokenType.TABLE): 2177 kind = "TABLE" 2178 elif self._match(TokenType.VIEW): 2179 kind = "VIEW" 2180 elif self._match(TokenType.ROW): 2181 kind = "ROW" 2182 elif self._match_text_seq("DATABASE"): 2183 kind = "DATABASE" 2184 else: 2185 kind = None 2186 2187 if kind in ("DATABASE", "TABLE", "VIEW"): 2188 this = self._parse_table_parts() 2189 else: 2190 this = None 2191 2192 if self._match(TokenType.FOR): 2193 for_or_in = "FOR" 2194 elif self._match(TokenType.IN): 2195 for_or_in = "IN" 2196 else: 2197 for_or_in = None 2198 2199 if self._match_text_seq("ACCESS"): 2200 lock_type = "ACCESS" 2201 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2202 lock_type = "EXCLUSIVE" 2203 elif self._match_text_seq("SHARE"): 2204 lock_type = "SHARE" 2205 elif self._match_text_seq("READ"): 2206 lock_type = "READ" 2207 elif self._match_text_seq("WRITE"): 2208 lock_type = "WRITE" 2209 elif self._match_text_seq("CHECKSUM"): 2210 lock_type = "CHECKSUM" 2211 else: 2212 lock_type = None 2213 2214 override = self._match_text_seq("OVERRIDE") 2215 2216 return self.expression( 2217 exp.LockingProperty, 2218 this=this, 2219 kind=kind, 2220 for_or_in=for_or_in, 2221 lock_type=lock_type, 2222 override=override, 2223 ) 2224 2225 def _parse_partition_by(self) -> t.List[exp.Expression]: 2226 if self._match(TokenType.PARTITION_BY): 2227 return self._parse_csv(self._parse_assignment) 2228 return [] 2229 2230 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2231 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2232 if self._match_text_seq("MINVALUE"): 2233 return exp.var("MINVALUE") 2234 if self._match_text_seq("MAXVALUE"): 2235 return exp.var("MAXVALUE") 2236 return self._parse_bitwise() 2237 2238 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2239 expression = None 2240 from_expressions = None 2241 to_expressions = None 2242 2243 if self._match(TokenType.IN): 2244 this = self._parse_wrapped_csv(self._parse_bitwise) 2245 elif self._match(TokenType.FROM): 2246 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2247 self._match_text_seq("TO") 2248 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2249 elif self._match_text_seq("WITH", "(", "MODULUS"): 2250 this = self._parse_number() 2251 self._match_text_seq(",", "REMAINDER") 2252 expression = self._parse_number() 2253 self._match_r_paren() 2254 else: 2255 self.raise_error("Failed to parse partition bound spec.") 2256 2257 return self.expression( 2258 exp.PartitionBoundSpec, 2259 this=this, 2260 expression=expression, 2261 from_expressions=from_expressions, 2262 to_expressions=to_expressions, 2263 ) 2264 2265 # https://www.postgresql.org/docs/current/sql-createtable.html 2266 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2267 if not self._match_text_seq("OF"): 2268 self._retreat(self._index - 1) 2269 return None 2270 2271 this = self._parse_table(schema=True) 2272 2273 if self._match(TokenType.DEFAULT): 2274 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2275 elif self._match_text_seq("FOR", "VALUES"): 2276 expression = self._parse_partition_bound_spec() 2277 else: 2278 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2279 2280 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2281 2282 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2283 self._match(TokenType.EQ) 2284 return self.expression( 2285 exp.PartitionedByProperty, 2286 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2287 ) 2288 2289 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2290 if self._match_text_seq("AND", "STATISTICS"): 2291 statistics = True 2292 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2293 statistics = False 2294 else: 2295 statistics = None 2296 2297 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2298 2299 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2300 if self._match_text_seq("SQL"): 2301 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2302 return None 2303 2304 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2305 if self._match_text_seq("SQL", "DATA"): 2306 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2307 return None 2308 2309 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2310 if self._match_text_seq("PRIMARY", "INDEX"): 2311 return exp.NoPrimaryIndexProperty() 2312 if self._match_text_seq("SQL"): 2313 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2314 return None 2315 2316 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2317 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2318 return exp.OnCommitProperty() 2319 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2320 return exp.OnCommitProperty(delete=True) 2321 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2322 2323 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2324 if self._match_text_seq("SQL", "DATA"): 2325 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2326 return None 2327 2328 def _parse_distkey(self) -> exp.DistKeyProperty: 2329 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2330 2331 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2332 table = self._parse_table(schema=True) 2333 2334 options = [] 2335 while self._match_texts(("INCLUDING", "EXCLUDING")): 2336 this = self._prev.text.upper() 2337 2338 id_var = self._parse_id_var() 2339 if not id_var: 2340 return None 2341 2342 options.append( 2343 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2344 ) 2345 2346 return self.expression(exp.LikeProperty, this=table, expressions=options) 2347 2348 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2349 return self.expression( 2350 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2351 ) 2352 2353 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2354 self._match(TokenType.EQ) 2355 return self.expression( 2356 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2357 ) 2358 2359 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2360 self._match_text_seq("WITH", "CONNECTION") 2361 return self.expression( 2362 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2363 ) 2364 2365 def _parse_returns(self) -> exp.ReturnsProperty: 2366 value: t.Optional[exp.Expression] 2367 null = None 2368 is_table = self._match(TokenType.TABLE) 2369 2370 if is_table: 2371 if self._match(TokenType.LT): 2372 value = self.expression( 2373 exp.Schema, 2374 this="TABLE", 2375 expressions=self._parse_csv(self._parse_struct_types), 2376 ) 2377 if not self._match(TokenType.GT): 2378 self.raise_error("Expecting >") 2379 else: 2380 value = self._parse_schema(exp.var("TABLE")) 2381 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2382 null = True 2383 value = None 2384 else: 2385 value = self._parse_types() 2386 2387 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2388 2389 def _parse_describe(self) -> exp.Describe: 2390 kind = self._match_set(self.CREATABLES) and self._prev.text 2391 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2392 if self._match(TokenType.DOT): 2393 style = None 2394 self._retreat(self._index - 2) 2395 this = self._parse_table(schema=True) 2396 properties = self._parse_properties() 2397 expressions = properties.expressions if properties else None 2398 return self.expression( 2399 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2400 ) 2401 2402 def _parse_insert(self) -> exp.Insert: 2403 comments = ensure_list(self._prev_comments) 2404 hint = self._parse_hint() 2405 overwrite = self._match(TokenType.OVERWRITE) 2406 ignore = self._match(TokenType.IGNORE) 2407 local = self._match_text_seq("LOCAL") 2408 alternative = None 2409 is_function = None 2410 2411 if self._match_text_seq("DIRECTORY"): 2412 this: t.Optional[exp.Expression] = self.expression( 2413 exp.Directory, 2414 this=self._parse_var_or_string(), 2415 local=local, 2416 row_format=self._parse_row_format(match_row=True), 2417 ) 2418 else: 2419 if self._match(TokenType.OR): 2420 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2421 2422 self._match(TokenType.INTO) 2423 comments += ensure_list(self._prev_comments) 2424 self._match(TokenType.TABLE) 2425 is_function = self._match(TokenType.FUNCTION) 2426 2427 this = ( 2428 self._parse_table(schema=True, parse_partition=True) 2429 if not is_function 2430 else self._parse_function() 2431 ) 2432 2433 returning = self._parse_returning() 2434 2435 return self.expression( 2436 exp.Insert, 2437 comments=comments, 2438 hint=hint, 2439 is_function=is_function, 2440 this=this, 2441 stored=self._match_text_seq("STORED") and self._parse_stored(), 2442 by_name=self._match_text_seq("BY", "NAME"), 2443 exists=self._parse_exists(), 2444 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2445 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2446 conflict=self._parse_on_conflict(), 2447 returning=returning or self._parse_returning(), 2448 overwrite=overwrite, 2449 alternative=alternative, 2450 ignore=ignore, 2451 ) 2452 2453 def _parse_kill(self) -> exp.Kill: 2454 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2455 2456 return self.expression( 2457 exp.Kill, 2458 this=self._parse_primary(), 2459 kind=kind, 2460 ) 2461 2462 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2463 conflict = self._match_text_seq("ON", "CONFLICT") 2464 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2465 2466 if not conflict and not duplicate: 2467 return None 2468 2469 conflict_keys = None 2470 constraint = None 2471 2472 if conflict: 2473 if self._match_text_seq("ON", "CONSTRAINT"): 2474 constraint = self._parse_id_var() 2475 elif self._match(TokenType.L_PAREN): 2476 conflict_keys = self._parse_csv(self._parse_id_var) 2477 self._match_r_paren() 2478 2479 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2480 if self._prev.token_type == TokenType.UPDATE: 2481 self._match(TokenType.SET) 2482 expressions = self._parse_csv(self._parse_equality) 2483 else: 2484 expressions = None 2485 2486 return self.expression( 2487 exp.OnConflict, 2488 duplicate=duplicate, 2489 expressions=expressions, 2490 action=action, 2491 conflict_keys=conflict_keys, 2492 constraint=constraint, 2493 ) 2494 2495 def _parse_returning(self) -> t.Optional[exp.Returning]: 2496 if not self._match(TokenType.RETURNING): 2497 return None 2498 return self.expression( 2499 exp.Returning, 2500 expressions=self._parse_csv(self._parse_expression), 2501 into=self._match(TokenType.INTO) and self._parse_table_part(), 2502 ) 2503 2504 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2505 if not self._match(TokenType.FORMAT): 2506 return None 2507 return self._parse_row_format() 2508 2509 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2510 index = self._index 2511 with_ = with_ or self._match_text_seq("WITH") 2512 2513 if not self._match(TokenType.SERDE_PROPERTIES): 2514 self._retreat(index) 2515 return None 2516 return self.expression( 2517 exp.SerdeProperties, 2518 **{ # type: ignore 2519 "expressions": self._parse_wrapped_properties(), 2520 "with": with_, 2521 }, 2522 ) 2523 2524 def _parse_row_format( 2525 self, match_row: bool = False 2526 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2527 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2528 return None 2529 2530 if self._match_text_seq("SERDE"): 2531 this = self._parse_string() 2532 2533 serde_properties = self._parse_serde_properties() 2534 2535 return self.expression( 2536 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2537 ) 2538 2539 self._match_text_seq("DELIMITED") 2540 2541 kwargs = {} 2542 2543 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2544 kwargs["fields"] = self._parse_string() 2545 if self._match_text_seq("ESCAPED", "BY"): 2546 kwargs["escaped"] = self._parse_string() 2547 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2548 kwargs["collection_items"] = self._parse_string() 2549 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2550 kwargs["map_keys"] = self._parse_string() 2551 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2552 kwargs["lines"] = self._parse_string() 2553 if self._match_text_seq("NULL", "DEFINED", "AS"): 2554 kwargs["null"] = self._parse_string() 2555 2556 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2557 2558 def _parse_load(self) -> exp.LoadData | exp.Command: 2559 if self._match_text_seq("DATA"): 2560 local = self._match_text_seq("LOCAL") 2561 self._match_text_seq("INPATH") 2562 inpath = self._parse_string() 2563 overwrite = self._match(TokenType.OVERWRITE) 2564 self._match_pair(TokenType.INTO, TokenType.TABLE) 2565 2566 return self.expression( 2567 exp.LoadData, 2568 this=self._parse_table(schema=True), 2569 local=local, 2570 overwrite=overwrite, 2571 inpath=inpath, 2572 partition=self._parse_partition(), 2573 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2574 serde=self._match_text_seq("SERDE") and self._parse_string(), 2575 ) 2576 return self._parse_as_command(self._prev) 2577 2578 def _parse_delete(self) -> exp.Delete: 2579 # This handles MySQL's "Multiple-Table Syntax" 2580 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2581 tables = None 2582 comments = self._prev_comments 2583 if not self._match(TokenType.FROM, advance=False): 2584 tables = self._parse_csv(self._parse_table) or None 2585 2586 returning = self._parse_returning() 2587 2588 return self.expression( 2589 exp.Delete, 2590 comments=comments, 2591 tables=tables, 2592 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2593 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2594 where=self._parse_where(), 2595 returning=returning or self._parse_returning(), 2596 limit=self._parse_limit(), 2597 ) 2598 2599 def _parse_update(self) -> exp.Update: 2600 comments = self._prev_comments 2601 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2602 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2603 returning = self._parse_returning() 2604 return self.expression( 2605 exp.Update, 2606 comments=comments, 2607 **{ # type: ignore 2608 "this": this, 2609 "expressions": expressions, 2610 "from": self._parse_from(joins=True), 2611 "where": self._parse_where(), 2612 "returning": returning or self._parse_returning(), 2613 "order": self._parse_order(), 2614 "limit": self._parse_limit(), 2615 }, 2616 ) 2617 2618 def _parse_uncache(self) -> exp.Uncache: 2619 if not self._match(TokenType.TABLE): 2620 self.raise_error("Expecting TABLE after UNCACHE") 2621 2622 return self.expression( 2623 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2624 ) 2625 2626 def _parse_cache(self) -> exp.Cache: 2627 lazy = self._match_text_seq("LAZY") 2628 self._match(TokenType.TABLE) 2629 table = self._parse_table(schema=True) 2630 2631 options = [] 2632 if self._match_text_seq("OPTIONS"): 2633 self._match_l_paren() 2634 k = self._parse_string() 2635 self._match(TokenType.EQ) 2636 v = self._parse_string() 2637 options = [k, v] 2638 self._match_r_paren() 2639 2640 self._match(TokenType.ALIAS) 2641 return self.expression( 2642 exp.Cache, 2643 this=table, 2644 lazy=lazy, 2645 options=options, 2646 expression=self._parse_select(nested=True), 2647 ) 2648 2649 def _parse_partition(self) -> t.Optional[exp.Partition]: 2650 if not self._match(TokenType.PARTITION): 2651 return None 2652 2653 return self.expression( 2654 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2655 ) 2656 2657 def _parse_value(self) -> t.Optional[exp.Tuple]: 2658 if self._match(TokenType.L_PAREN): 2659 expressions = self._parse_csv(self._parse_expression) 2660 self._match_r_paren() 2661 return self.expression(exp.Tuple, expressions=expressions) 2662 2663 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2664 expression = self._parse_expression() 2665 if expression: 2666 return self.expression(exp.Tuple, expressions=[expression]) 2667 return None 2668 2669 def _parse_projections(self) -> t.List[exp.Expression]: 2670 return self._parse_expressions() 2671 2672 def _parse_select( 2673 self, 2674 nested: bool = False, 2675 table: bool = False, 2676 parse_subquery_alias: bool = True, 2677 parse_set_operation: bool = True, 2678 ) -> t.Optional[exp.Expression]: 2679 cte = self._parse_with() 2680 2681 if cte: 2682 this = self._parse_statement() 2683 2684 if not this: 2685 self.raise_error("Failed to parse any statement following CTE") 2686 return cte 2687 2688 if "with" in this.arg_types: 2689 this.set("with", cte) 2690 else: 2691 self.raise_error(f"{this.key} does not support CTE") 2692 this = cte 2693 2694 return this 2695 2696 # duckdb supports leading with FROM x 2697 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2698 2699 if self._match(TokenType.SELECT): 2700 comments = self._prev_comments 2701 2702 hint = self._parse_hint() 2703 all_ = self._match(TokenType.ALL) 2704 distinct = self._match_set(self.DISTINCT_TOKENS) 2705 2706 kind = ( 2707 self._match(TokenType.ALIAS) 2708 and self._match_texts(("STRUCT", "VALUE")) 2709 and self._prev.text.upper() 2710 ) 2711 2712 if distinct: 2713 distinct = self.expression( 2714 exp.Distinct, 2715 on=self._parse_value() if self._match(TokenType.ON) else None, 2716 ) 2717 2718 if all_ and distinct: 2719 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2720 2721 limit = self._parse_limit(top=True) 2722 projections = self._parse_projections() 2723 2724 this = self.expression( 2725 exp.Select, 2726 kind=kind, 2727 hint=hint, 2728 distinct=distinct, 2729 expressions=projections, 2730 limit=limit, 2731 ) 2732 this.comments = comments 2733 2734 into = self._parse_into() 2735 if into: 2736 this.set("into", into) 2737 2738 if not from_: 2739 from_ = self._parse_from() 2740 2741 if from_: 2742 this.set("from", from_) 2743 2744 this = self._parse_query_modifiers(this) 2745 elif (table or nested) and self._match(TokenType.L_PAREN): 2746 if self._match(TokenType.PIVOT): 2747 this = self._parse_simplified_pivot() 2748 elif self._match(TokenType.FROM): 2749 this = exp.select("*").from_( 2750 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2751 ) 2752 else: 2753 this = ( 2754 self._parse_table() 2755 if table 2756 else self._parse_select(nested=True, parse_set_operation=False) 2757 ) 2758 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2759 2760 self._match_r_paren() 2761 2762 # We return early here so that the UNION isn't attached to the subquery by the 2763 # following call to _parse_set_operations, but instead becomes the parent node 2764 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2765 elif self._match(TokenType.VALUES, advance=False): 2766 this = self._parse_derived_table_values() 2767 elif from_: 2768 this = exp.select("*").from_(from_.this, copy=False) 2769 else: 2770 this = None 2771 2772 if parse_set_operation: 2773 return self._parse_set_operations(this) 2774 return this 2775 2776 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2777 if not skip_with_token and not self._match(TokenType.WITH): 2778 return None 2779 2780 comments = self._prev_comments 2781 recursive = self._match(TokenType.RECURSIVE) 2782 2783 expressions = [] 2784 while True: 2785 expressions.append(self._parse_cte()) 2786 2787 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2788 break 2789 else: 2790 self._match(TokenType.WITH) 2791 2792 return self.expression( 2793 exp.With, comments=comments, expressions=expressions, recursive=recursive 2794 ) 2795 2796 def _parse_cte(self) -> exp.CTE: 2797 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2798 if not alias or not alias.this: 2799 self.raise_error("Expected CTE to have alias") 2800 2801 self._match(TokenType.ALIAS) 2802 2803 if self._match_text_seq("NOT", "MATERIALIZED"): 2804 materialized = False 2805 elif self._match_text_seq("MATERIALIZED"): 2806 materialized = True 2807 else: 2808 materialized = None 2809 2810 return self.expression( 2811 exp.CTE, 2812 this=self._parse_wrapped(self._parse_statement), 2813 alias=alias, 2814 materialized=materialized, 2815 ) 2816 2817 def _parse_table_alias( 2818 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2819 ) -> t.Optional[exp.TableAlias]: 2820 any_token = self._match(TokenType.ALIAS) 2821 alias = ( 2822 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2823 or self._parse_string_as_identifier() 2824 ) 2825 2826 index = self._index 2827 if self._match(TokenType.L_PAREN): 2828 columns = self._parse_csv(self._parse_function_parameter) 2829 self._match_r_paren() if columns else self._retreat(index) 2830 else: 2831 columns = None 2832 2833 if not alias and not columns: 2834 return None 2835 2836 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 2837 2838 # We bubble up comments from the Identifier to the TableAlias 2839 if isinstance(alias, exp.Identifier): 2840 table_alias.add_comments(alias.pop_comments()) 2841 2842 return table_alias 2843 2844 def _parse_subquery( 2845 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2846 ) -> t.Optional[exp.Subquery]: 2847 if not this: 2848 return None 2849 2850 return self.expression( 2851 exp.Subquery, 2852 this=this, 2853 pivots=self._parse_pivots(), 2854 alias=self._parse_table_alias() if parse_alias else None, 2855 ) 2856 2857 def _implicit_unnests_to_explicit(self, this: E) -> E: 2858 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2859 2860 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2861 for i, join in enumerate(this.args.get("joins") or []): 2862 table = join.this 2863 normalized_table = table.copy() 2864 normalized_table.meta["maybe_column"] = True 2865 normalized_table = _norm(normalized_table, dialect=self.dialect) 2866 2867 if isinstance(table, exp.Table) and not join.args.get("on"): 2868 if normalized_table.parts[0].name in refs: 2869 table_as_column = table.to_column() 2870 unnest = exp.Unnest(expressions=[table_as_column]) 2871 2872 # Table.to_column creates a parent Alias node that we want to convert to 2873 # a TableAlias and attach to the Unnest, so it matches the parser's output 2874 if isinstance(table.args.get("alias"), exp.TableAlias): 2875 table_as_column.replace(table_as_column.this) 2876 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2877 2878 table.replace(unnest) 2879 2880 refs.add(normalized_table.alias_or_name) 2881 2882 return this 2883 2884 def _parse_query_modifiers( 2885 self, this: t.Optional[exp.Expression] 2886 ) -> t.Optional[exp.Expression]: 2887 if isinstance(this, (exp.Query, exp.Table)): 2888 for join in self._parse_joins(): 2889 this.append("joins", join) 2890 for lateral in iter(self._parse_lateral, None): 2891 this.append("laterals", lateral) 2892 2893 while True: 2894 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2895 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2896 key, expression = parser(self) 2897 2898 if expression: 2899 this.set(key, expression) 2900 if key == "limit": 2901 offset = expression.args.pop("offset", None) 2902 2903 if offset: 2904 offset = exp.Offset(expression=offset) 2905 this.set("offset", offset) 2906 2907 limit_by_expressions = expression.expressions 2908 expression.set("expressions", None) 2909 offset.set("expressions", limit_by_expressions) 2910 continue 2911 break 2912 2913 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 2914 this = self._implicit_unnests_to_explicit(this) 2915 2916 return this 2917 2918 def _parse_hint(self) -> t.Optional[exp.Hint]: 2919 if self._match(TokenType.HINT): 2920 hints = [] 2921 for hint in iter( 2922 lambda: self._parse_csv( 2923 lambda: self._parse_function() or self._parse_var(upper=True) 2924 ), 2925 [], 2926 ): 2927 hints.extend(hint) 2928 2929 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2930 self.raise_error("Expected */ after HINT") 2931 2932 return self.expression(exp.Hint, expressions=hints) 2933 2934 return None 2935 2936 def _parse_into(self) -> t.Optional[exp.Into]: 2937 if not self._match(TokenType.INTO): 2938 return None 2939 2940 temp = self._match(TokenType.TEMPORARY) 2941 unlogged = self._match_text_seq("UNLOGGED") 2942 self._match(TokenType.TABLE) 2943 2944 return self.expression( 2945 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2946 ) 2947 2948 def _parse_from( 2949 self, joins: bool = False, skip_from_token: bool = False 2950 ) -> t.Optional[exp.From]: 2951 if not skip_from_token and not self._match(TokenType.FROM): 2952 return None 2953 2954 return self.expression( 2955 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2956 ) 2957 2958 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2959 return self.expression( 2960 exp.MatchRecognizeMeasure, 2961 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2962 this=self._parse_expression(), 2963 ) 2964 2965 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2966 if not self._match(TokenType.MATCH_RECOGNIZE): 2967 return None 2968 2969 self._match_l_paren() 2970 2971 partition = self._parse_partition_by() 2972 order = self._parse_order() 2973 2974 measures = ( 2975 self._parse_csv(self._parse_match_recognize_measure) 2976 if self._match_text_seq("MEASURES") 2977 else None 2978 ) 2979 2980 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2981 rows = exp.var("ONE ROW PER MATCH") 2982 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2983 text = "ALL ROWS PER MATCH" 2984 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2985 text += " SHOW EMPTY MATCHES" 2986 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2987 text += " OMIT EMPTY MATCHES" 2988 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2989 text += " WITH UNMATCHED ROWS" 2990 rows = exp.var(text) 2991 else: 2992 rows = None 2993 2994 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2995 text = "AFTER MATCH SKIP" 2996 if self._match_text_seq("PAST", "LAST", "ROW"): 2997 text += " PAST LAST ROW" 2998 elif self._match_text_seq("TO", "NEXT", "ROW"): 2999 text += " TO NEXT ROW" 3000 elif self._match_text_seq("TO", "FIRST"): 3001 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3002 elif self._match_text_seq("TO", "LAST"): 3003 text += f" TO LAST {self._advance_any().text}" # type: ignore 3004 after = exp.var(text) 3005 else: 3006 after = None 3007 3008 if self._match_text_seq("PATTERN"): 3009 self._match_l_paren() 3010 3011 if not self._curr: 3012 self.raise_error("Expecting )", self._curr) 3013 3014 paren = 1 3015 start = self._curr 3016 3017 while self._curr and paren > 0: 3018 if self._curr.token_type == TokenType.L_PAREN: 3019 paren += 1 3020 if self._curr.token_type == TokenType.R_PAREN: 3021 paren -= 1 3022 3023 end = self._prev 3024 self._advance() 3025 3026 if paren > 0: 3027 self.raise_error("Expecting )", self._curr) 3028 3029 pattern = exp.var(self._find_sql(start, end)) 3030 else: 3031 pattern = None 3032 3033 define = ( 3034 self._parse_csv(self._parse_name_as_expression) 3035 if self._match_text_seq("DEFINE") 3036 else None 3037 ) 3038 3039 self._match_r_paren() 3040 3041 return self.expression( 3042 exp.MatchRecognize, 3043 partition_by=partition, 3044 order=order, 3045 measures=measures, 3046 rows=rows, 3047 after=after, 3048 pattern=pattern, 3049 define=define, 3050 alias=self._parse_table_alias(), 3051 ) 3052 3053 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3054 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3055 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3056 cross_apply = False 3057 3058 if cross_apply is not None: 3059 this = self._parse_select(table=True) 3060 view = None 3061 outer = None 3062 elif self._match(TokenType.LATERAL): 3063 this = self._parse_select(table=True) 3064 view = self._match(TokenType.VIEW) 3065 outer = self._match(TokenType.OUTER) 3066 else: 3067 return None 3068 3069 if not this: 3070 this = ( 3071 self._parse_unnest() 3072 or self._parse_function() 3073 or self._parse_id_var(any_token=False) 3074 ) 3075 3076 while self._match(TokenType.DOT): 3077 this = exp.Dot( 3078 this=this, 3079 expression=self._parse_function() or self._parse_id_var(any_token=False), 3080 ) 3081 3082 if view: 3083 table = self._parse_id_var(any_token=False) 3084 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3085 table_alias: t.Optional[exp.TableAlias] = self.expression( 3086 exp.TableAlias, this=table, columns=columns 3087 ) 3088 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3089 # We move the alias from the lateral's child node to the lateral itself 3090 table_alias = this.args["alias"].pop() 3091 else: 3092 table_alias = self._parse_table_alias() 3093 3094 return self.expression( 3095 exp.Lateral, 3096 this=this, 3097 view=view, 3098 outer=outer, 3099 alias=table_alias, 3100 cross_apply=cross_apply, 3101 ) 3102 3103 def _parse_join_parts( 3104 self, 3105 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3106 return ( 3107 self._match_set(self.JOIN_METHODS) and self._prev, 3108 self._match_set(self.JOIN_SIDES) and self._prev, 3109 self._match_set(self.JOIN_KINDS) and self._prev, 3110 ) 3111 3112 def _parse_join( 3113 self, skip_join_token: bool = False, parse_bracket: bool = False 3114 ) -> t.Optional[exp.Join]: 3115 if self._match(TokenType.COMMA): 3116 return self.expression(exp.Join, this=self._parse_table()) 3117 3118 index = self._index 3119 method, side, kind = self._parse_join_parts() 3120 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3121 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3122 3123 if not skip_join_token and not join: 3124 self._retreat(index) 3125 kind = None 3126 method = None 3127 side = None 3128 3129 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3130 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3131 3132 if not skip_join_token and not join and not outer_apply and not cross_apply: 3133 return None 3134 3135 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3136 3137 if method: 3138 kwargs["method"] = method.text 3139 if side: 3140 kwargs["side"] = side.text 3141 if kind: 3142 kwargs["kind"] = kind.text 3143 if hint: 3144 kwargs["hint"] = hint 3145 3146 if self._match(TokenType.MATCH_CONDITION): 3147 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3148 3149 if self._match(TokenType.ON): 3150 kwargs["on"] = self._parse_assignment() 3151 elif self._match(TokenType.USING): 3152 kwargs["using"] = self._parse_wrapped_id_vars() 3153 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3154 kind and kind.token_type == TokenType.CROSS 3155 ): 3156 index = self._index 3157 joins: t.Optional[list] = list(self._parse_joins()) 3158 3159 if joins and self._match(TokenType.ON): 3160 kwargs["on"] = self._parse_assignment() 3161 elif joins and self._match(TokenType.USING): 3162 kwargs["using"] = self._parse_wrapped_id_vars() 3163 else: 3164 joins = None 3165 self._retreat(index) 3166 3167 kwargs["this"].set("joins", joins if joins else None) 3168 3169 comments = [c for token in (method, side, kind) if token for c in token.comments] 3170 return self.expression(exp.Join, comments=comments, **kwargs) 3171 3172 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3173 this = self._parse_assignment() 3174 3175 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3176 return this 3177 3178 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3179 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3180 3181 return this 3182 3183 def _parse_index_params(self) -> exp.IndexParameters: 3184 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3185 3186 if self._match(TokenType.L_PAREN, advance=False): 3187 columns = self._parse_wrapped_csv(self._parse_with_operator) 3188 else: 3189 columns = None 3190 3191 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3192 partition_by = self._parse_partition_by() 3193 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3194 tablespace = ( 3195 self._parse_var(any_token=True) 3196 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3197 else None 3198 ) 3199 where = self._parse_where() 3200 3201 on = self._parse_field() if self._match(TokenType.ON) else None 3202 3203 return self.expression( 3204 exp.IndexParameters, 3205 using=using, 3206 columns=columns, 3207 include=include, 3208 partition_by=partition_by, 3209 where=where, 3210 with_storage=with_storage, 3211 tablespace=tablespace, 3212 on=on, 3213 ) 3214 3215 def _parse_index( 3216 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3217 ) -> t.Optional[exp.Index]: 3218 if index or anonymous: 3219 unique = None 3220 primary = None 3221 amp = None 3222 3223 self._match(TokenType.ON) 3224 self._match(TokenType.TABLE) # hive 3225 table = self._parse_table_parts(schema=True) 3226 else: 3227 unique = self._match(TokenType.UNIQUE) 3228 primary = self._match_text_seq("PRIMARY") 3229 amp = self._match_text_seq("AMP") 3230 3231 if not self._match(TokenType.INDEX): 3232 return None 3233 3234 index = self._parse_id_var() 3235 table = None 3236 3237 params = self._parse_index_params() 3238 3239 return self.expression( 3240 exp.Index, 3241 this=index, 3242 table=table, 3243 unique=unique, 3244 primary=primary, 3245 amp=amp, 3246 params=params, 3247 ) 3248 3249 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3250 hints: t.List[exp.Expression] = [] 3251 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3252 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3253 hints.append( 3254 self.expression( 3255 exp.WithTableHint, 3256 expressions=self._parse_csv( 3257 lambda: self._parse_function() or self._parse_var(any_token=True) 3258 ), 3259 ) 3260 ) 3261 self._match_r_paren() 3262 else: 3263 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3264 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3265 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3266 3267 self._match_set((TokenType.INDEX, TokenType.KEY)) 3268 if self._match(TokenType.FOR): 3269 hint.set("target", self._advance_any() and self._prev.text.upper()) 3270 3271 hint.set("expressions", self._parse_wrapped_id_vars()) 3272 hints.append(hint) 3273 3274 return hints or None 3275 3276 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3277 return ( 3278 (not schema and self._parse_function(optional_parens=False)) 3279 or self._parse_id_var(any_token=False) 3280 or self._parse_string_as_identifier() 3281 or self._parse_placeholder() 3282 ) 3283 3284 def _parse_table_parts( 3285 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3286 ) -> exp.Table: 3287 catalog = None 3288 db = None 3289 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3290 3291 while self._match(TokenType.DOT): 3292 if catalog: 3293 # This allows nesting the table in arbitrarily many dot expressions if needed 3294 table = self.expression( 3295 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3296 ) 3297 else: 3298 catalog = db 3299 db = table 3300 # "" used for tsql FROM a..b case 3301 table = self._parse_table_part(schema=schema) or "" 3302 3303 if ( 3304 wildcard 3305 and self._is_connected() 3306 and (isinstance(table, exp.Identifier) or not table) 3307 and self._match(TokenType.STAR) 3308 ): 3309 if isinstance(table, exp.Identifier): 3310 table.args["this"] += "*" 3311 else: 3312 table = exp.Identifier(this="*") 3313 3314 # We bubble up comments from the Identifier to the Table 3315 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3316 3317 if is_db_reference: 3318 catalog = db 3319 db = table 3320 table = None 3321 3322 if not table and not is_db_reference: 3323 self.raise_error(f"Expected table name but got {self._curr}") 3324 if not db and is_db_reference: 3325 self.raise_error(f"Expected database name but got {self._curr}") 3326 3327 return self.expression( 3328 exp.Table, 3329 comments=comments, 3330 this=table, 3331 db=db, 3332 catalog=catalog, 3333 pivots=self._parse_pivots(), 3334 ) 3335 3336 def _parse_table( 3337 self, 3338 schema: bool = False, 3339 joins: bool = False, 3340 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3341 parse_bracket: bool = False, 3342 is_db_reference: bool = False, 3343 parse_partition: bool = False, 3344 ) -> t.Optional[exp.Expression]: 3345 lateral = self._parse_lateral() 3346 if lateral: 3347 return lateral 3348 3349 unnest = self._parse_unnest() 3350 if unnest: 3351 return unnest 3352 3353 values = self._parse_derived_table_values() 3354 if values: 3355 return values 3356 3357 subquery = self._parse_select(table=True) 3358 if subquery: 3359 if not subquery.args.get("pivots"): 3360 subquery.set("pivots", self._parse_pivots()) 3361 return subquery 3362 3363 bracket = parse_bracket and self._parse_bracket(None) 3364 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3365 3366 only = self._match(TokenType.ONLY) 3367 3368 this = t.cast( 3369 exp.Expression, 3370 bracket 3371 or self._parse_bracket( 3372 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3373 ), 3374 ) 3375 3376 if only: 3377 this.set("only", only) 3378 3379 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3380 self._match_text_seq("*") 3381 3382 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3383 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3384 this.set("partition", self._parse_partition()) 3385 3386 if schema: 3387 return self._parse_schema(this=this) 3388 3389 version = self._parse_version() 3390 3391 if version: 3392 this.set("version", version) 3393 3394 if self.dialect.ALIAS_POST_TABLESAMPLE: 3395 table_sample = self._parse_table_sample() 3396 3397 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3398 if alias: 3399 this.set("alias", alias) 3400 3401 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3402 return self.expression( 3403 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3404 ) 3405 3406 this.set("hints", self._parse_table_hints()) 3407 3408 if not this.args.get("pivots"): 3409 this.set("pivots", self._parse_pivots()) 3410 3411 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3412 table_sample = self._parse_table_sample() 3413 3414 if table_sample: 3415 table_sample.set("this", this) 3416 this = table_sample 3417 3418 if joins: 3419 for join in self._parse_joins(): 3420 this.append("joins", join) 3421 3422 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3423 this.set("ordinality", True) 3424 this.set("alias", self._parse_table_alias()) 3425 3426 return this 3427 3428 def _parse_version(self) -> t.Optional[exp.Version]: 3429 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3430 this = "TIMESTAMP" 3431 elif self._match(TokenType.VERSION_SNAPSHOT): 3432 this = "VERSION" 3433 else: 3434 return None 3435 3436 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3437 kind = self._prev.text.upper() 3438 start = self._parse_bitwise() 3439 self._match_texts(("TO", "AND")) 3440 end = self._parse_bitwise() 3441 expression: t.Optional[exp.Expression] = self.expression( 3442 exp.Tuple, expressions=[start, end] 3443 ) 3444 elif self._match_text_seq("CONTAINED", "IN"): 3445 kind = "CONTAINED IN" 3446 expression = self.expression( 3447 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3448 ) 3449 elif self._match(TokenType.ALL): 3450 kind = "ALL" 3451 expression = None 3452 else: 3453 self._match_text_seq("AS", "OF") 3454 kind = "AS OF" 3455 expression = self._parse_type() 3456 3457 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3458 3459 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3460 if not self._match(TokenType.UNNEST): 3461 return None 3462 3463 expressions = self._parse_wrapped_csv(self._parse_equality) 3464 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3465 3466 alias = self._parse_table_alias() if with_alias else None 3467 3468 if alias: 3469 if self.dialect.UNNEST_COLUMN_ONLY: 3470 if alias.args.get("columns"): 3471 self.raise_error("Unexpected extra column alias in unnest.") 3472 3473 alias.set("columns", [alias.this]) 3474 alias.set("this", None) 3475 3476 columns = alias.args.get("columns") or [] 3477 if offset and len(expressions) < len(columns): 3478 offset = columns.pop() 3479 3480 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3481 self._match(TokenType.ALIAS) 3482 offset = self._parse_id_var( 3483 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3484 ) or exp.to_identifier("offset") 3485 3486 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3487 3488 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3489 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3490 if not is_derived and not self._match_text_seq("VALUES"): 3491 return None 3492 3493 expressions = self._parse_csv(self._parse_value) 3494 alias = self._parse_table_alias() 3495 3496 if is_derived: 3497 self._match_r_paren() 3498 3499 return self.expression( 3500 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3501 ) 3502 3503 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3504 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3505 as_modifier and self._match_text_seq("USING", "SAMPLE") 3506 ): 3507 return None 3508 3509 bucket_numerator = None 3510 bucket_denominator = None 3511 bucket_field = None 3512 percent = None 3513 size = None 3514 seed = None 3515 3516 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3517 matched_l_paren = self._match(TokenType.L_PAREN) 3518 3519 if self.TABLESAMPLE_CSV: 3520 num = None 3521 expressions = self._parse_csv(self._parse_primary) 3522 else: 3523 expressions = None 3524 num = ( 3525 self._parse_factor() 3526 if self._match(TokenType.NUMBER, advance=False) 3527 else self._parse_primary() or self._parse_placeholder() 3528 ) 3529 3530 if self._match_text_seq("BUCKET"): 3531 bucket_numerator = self._parse_number() 3532 self._match_text_seq("OUT", "OF") 3533 bucket_denominator = bucket_denominator = self._parse_number() 3534 self._match(TokenType.ON) 3535 bucket_field = self._parse_field() 3536 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3537 percent = num 3538 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3539 size = num 3540 else: 3541 percent = num 3542 3543 if matched_l_paren: 3544 self._match_r_paren() 3545 3546 if self._match(TokenType.L_PAREN): 3547 method = self._parse_var(upper=True) 3548 seed = self._match(TokenType.COMMA) and self._parse_number() 3549 self._match_r_paren() 3550 elif self._match_texts(("SEED", "REPEATABLE")): 3551 seed = self._parse_wrapped(self._parse_number) 3552 3553 if not method and self.DEFAULT_SAMPLING_METHOD: 3554 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3555 3556 return self.expression( 3557 exp.TableSample, 3558 expressions=expressions, 3559 method=method, 3560 bucket_numerator=bucket_numerator, 3561 bucket_denominator=bucket_denominator, 3562 bucket_field=bucket_field, 3563 percent=percent, 3564 size=size, 3565 seed=seed, 3566 ) 3567 3568 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3569 return list(iter(self._parse_pivot, None)) or None 3570 3571 def _parse_joins(self) -> t.Iterator[exp.Join]: 3572 return iter(self._parse_join, None) 3573 3574 # https://duckdb.org/docs/sql/statements/pivot 3575 def _parse_simplified_pivot(self) -> exp.Pivot: 3576 def _parse_on() -> t.Optional[exp.Expression]: 3577 this = self._parse_bitwise() 3578 return self._parse_in(this) if self._match(TokenType.IN) else this 3579 3580 this = self._parse_table() 3581 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3582 using = self._match(TokenType.USING) and self._parse_csv( 3583 lambda: self._parse_alias(self._parse_function()) 3584 ) 3585 group = self._parse_group() 3586 return self.expression( 3587 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3588 ) 3589 3590 def _parse_pivot_in(self) -> exp.In: 3591 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3592 this = self._parse_assignment() 3593 3594 self._match(TokenType.ALIAS) 3595 alias = self._parse_field() 3596 if alias: 3597 return self.expression(exp.PivotAlias, this=this, alias=alias) 3598 3599 return this 3600 3601 value = self._parse_column() 3602 3603 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3604 self.raise_error("Expecting IN (") 3605 3606 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3607 3608 self._match_r_paren() 3609 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3610 3611 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3612 index = self._index 3613 include_nulls = None 3614 3615 if self._match(TokenType.PIVOT): 3616 unpivot = False 3617 elif self._match(TokenType.UNPIVOT): 3618 unpivot = True 3619 3620 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3621 if self._match_text_seq("INCLUDE", "NULLS"): 3622 include_nulls = True 3623 elif self._match_text_seq("EXCLUDE", "NULLS"): 3624 include_nulls = False 3625 else: 3626 return None 3627 3628 expressions = [] 3629 3630 if not self._match(TokenType.L_PAREN): 3631 self._retreat(index) 3632 return None 3633 3634 if unpivot: 3635 expressions = self._parse_csv(self._parse_column) 3636 else: 3637 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3638 3639 if not expressions: 3640 self.raise_error("Failed to parse PIVOT's aggregation list") 3641 3642 if not self._match(TokenType.FOR): 3643 self.raise_error("Expecting FOR") 3644 3645 field = self._parse_pivot_in() 3646 3647 self._match_r_paren() 3648 3649 pivot = self.expression( 3650 exp.Pivot, 3651 expressions=expressions, 3652 field=field, 3653 unpivot=unpivot, 3654 include_nulls=include_nulls, 3655 ) 3656 3657 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3658 pivot.set("alias", self._parse_table_alias()) 3659 3660 if not unpivot: 3661 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3662 3663 columns: t.List[exp.Expression] = [] 3664 for fld in pivot.args["field"].expressions: 3665 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3666 for name in names: 3667 if self.PREFIXED_PIVOT_COLUMNS: 3668 name = f"{name}_{field_name}" if name else field_name 3669 else: 3670 name = f"{field_name}_{name}" if name else field_name 3671 3672 columns.append(exp.to_identifier(name)) 3673 3674 pivot.set("columns", columns) 3675 3676 return pivot 3677 3678 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3679 return [agg.alias for agg in aggregations] 3680 3681 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3682 if not skip_where_token and not self._match(TokenType.PREWHERE): 3683 return None 3684 3685 return self.expression( 3686 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 3687 ) 3688 3689 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3690 if not skip_where_token and not self._match(TokenType.WHERE): 3691 return None 3692 3693 return self.expression( 3694 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 3695 ) 3696 3697 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3698 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3699 return None 3700 3701 elements: t.Dict[str, t.Any] = defaultdict(list) 3702 3703 if self._match(TokenType.ALL): 3704 elements["all"] = True 3705 elif self._match(TokenType.DISTINCT): 3706 elements["all"] = False 3707 3708 while True: 3709 expressions = self._parse_csv( 3710 lambda: None 3711 if self._match(TokenType.ROLLUP, advance=False) 3712 else self._parse_assignment() 3713 ) 3714 if expressions: 3715 elements["expressions"].extend(expressions) 3716 3717 grouping_sets = self._parse_grouping_sets() 3718 if grouping_sets: 3719 elements["grouping_sets"].extend(grouping_sets) 3720 3721 rollup = None 3722 cube = None 3723 totals = None 3724 3725 index = self._index 3726 with_ = self._match(TokenType.WITH) 3727 if self._match(TokenType.ROLLUP): 3728 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3729 elements["rollup"].extend(ensure_list(rollup)) 3730 3731 if self._match(TokenType.CUBE): 3732 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3733 elements["cube"].extend(ensure_list(cube)) 3734 3735 if self._match_text_seq("TOTALS"): 3736 totals = True 3737 elements["totals"] = True # type: ignore 3738 3739 if not (grouping_sets or rollup or cube or totals): 3740 if with_: 3741 self._retreat(index) 3742 break 3743 3744 return self.expression(exp.Group, **elements) # type: ignore 3745 3746 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3747 if not self._match(TokenType.GROUPING_SETS): 3748 return None 3749 3750 return self._parse_wrapped_csv(self._parse_grouping_set) 3751 3752 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3753 if self._match(TokenType.L_PAREN): 3754 grouping_set = self._parse_csv(self._parse_column) 3755 self._match_r_paren() 3756 return self.expression(exp.Tuple, expressions=grouping_set) 3757 3758 return self._parse_column() 3759 3760 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3761 if not skip_having_token and not self._match(TokenType.HAVING): 3762 return None 3763 return self.expression(exp.Having, this=self._parse_assignment()) 3764 3765 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3766 if not self._match(TokenType.QUALIFY): 3767 return None 3768 return self.expression(exp.Qualify, this=self._parse_assignment()) 3769 3770 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3771 if skip_start_token: 3772 start = None 3773 elif self._match(TokenType.START_WITH): 3774 start = self._parse_assignment() 3775 else: 3776 return None 3777 3778 self._match(TokenType.CONNECT_BY) 3779 nocycle = self._match_text_seq("NOCYCLE") 3780 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3781 exp.Prior, this=self._parse_bitwise() 3782 ) 3783 connect = self._parse_assignment() 3784 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3785 3786 if not start and self._match(TokenType.START_WITH): 3787 start = self._parse_assignment() 3788 3789 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3790 3791 def _parse_name_as_expression(self) -> exp.Alias: 3792 return self.expression( 3793 exp.Alias, 3794 alias=self._parse_id_var(any_token=True), 3795 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 3796 ) 3797 3798 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3799 if self._match_text_seq("INTERPOLATE"): 3800 return self._parse_wrapped_csv(self._parse_name_as_expression) 3801 return None 3802 3803 def _parse_order( 3804 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3805 ) -> t.Optional[exp.Expression]: 3806 siblings = None 3807 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3808 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3809 return this 3810 3811 siblings = True 3812 3813 return self.expression( 3814 exp.Order, 3815 this=this, 3816 expressions=self._parse_csv(self._parse_ordered), 3817 interpolate=self._parse_interpolate(), 3818 siblings=siblings, 3819 ) 3820 3821 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3822 if not self._match(token): 3823 return None 3824 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3825 3826 def _parse_ordered( 3827 self, parse_method: t.Optional[t.Callable] = None 3828 ) -> t.Optional[exp.Ordered]: 3829 this = parse_method() if parse_method else self._parse_assignment() 3830 if not this: 3831 return None 3832 3833 asc = self._match(TokenType.ASC) 3834 desc = self._match(TokenType.DESC) or (asc and False) 3835 3836 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3837 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3838 3839 nulls_first = is_nulls_first or False 3840 explicitly_null_ordered = is_nulls_first or is_nulls_last 3841 3842 if ( 3843 not explicitly_null_ordered 3844 and ( 3845 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3846 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3847 ) 3848 and self.dialect.NULL_ORDERING != "nulls_are_last" 3849 ): 3850 nulls_first = True 3851 3852 if self._match_text_seq("WITH", "FILL"): 3853 with_fill = self.expression( 3854 exp.WithFill, 3855 **{ # type: ignore 3856 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3857 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3858 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3859 }, 3860 ) 3861 else: 3862 with_fill = None 3863 3864 return self.expression( 3865 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3866 ) 3867 3868 def _parse_limit( 3869 self, 3870 this: t.Optional[exp.Expression] = None, 3871 top: bool = False, 3872 skip_limit_token: bool = False, 3873 ) -> t.Optional[exp.Expression]: 3874 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3875 comments = self._prev_comments 3876 if top: 3877 limit_paren = self._match(TokenType.L_PAREN) 3878 expression = self._parse_term() if limit_paren else self._parse_number() 3879 3880 if limit_paren: 3881 self._match_r_paren() 3882 else: 3883 expression = self._parse_term() 3884 3885 if self._match(TokenType.COMMA): 3886 offset = expression 3887 expression = self._parse_term() 3888 else: 3889 offset = None 3890 3891 limit_exp = self.expression( 3892 exp.Limit, 3893 this=this, 3894 expression=expression, 3895 offset=offset, 3896 comments=comments, 3897 expressions=self._parse_limit_by(), 3898 ) 3899 3900 return limit_exp 3901 3902 if self._match(TokenType.FETCH): 3903 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3904 direction = self._prev.text.upper() if direction else "FIRST" 3905 3906 count = self._parse_field(tokens=self.FETCH_TOKENS) 3907 percent = self._match(TokenType.PERCENT) 3908 3909 self._match_set((TokenType.ROW, TokenType.ROWS)) 3910 3911 only = self._match_text_seq("ONLY") 3912 with_ties = self._match_text_seq("WITH", "TIES") 3913 3914 if only and with_ties: 3915 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3916 3917 return self.expression( 3918 exp.Fetch, 3919 direction=direction, 3920 count=count, 3921 percent=percent, 3922 with_ties=with_ties, 3923 ) 3924 3925 return this 3926 3927 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3928 if not self._match(TokenType.OFFSET): 3929 return this 3930 3931 count = self._parse_term() 3932 self._match_set((TokenType.ROW, TokenType.ROWS)) 3933 3934 return self.expression( 3935 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3936 ) 3937 3938 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3939 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3940 3941 def _parse_locks(self) -> t.List[exp.Lock]: 3942 locks = [] 3943 while True: 3944 if self._match_text_seq("FOR", "UPDATE"): 3945 update = True 3946 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3947 "LOCK", "IN", "SHARE", "MODE" 3948 ): 3949 update = False 3950 else: 3951 break 3952 3953 expressions = None 3954 if self._match_text_seq("OF"): 3955 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3956 3957 wait: t.Optional[bool | exp.Expression] = None 3958 if self._match_text_seq("NOWAIT"): 3959 wait = True 3960 elif self._match_text_seq("WAIT"): 3961 wait = self._parse_primary() 3962 elif self._match_text_seq("SKIP", "LOCKED"): 3963 wait = False 3964 3965 locks.append( 3966 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3967 ) 3968 3969 return locks 3970 3971 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3972 while this and self._match_set(self.SET_OPERATIONS): 3973 token_type = self._prev.token_type 3974 3975 if token_type == TokenType.UNION: 3976 operation: t.Type[exp.SetOperation] = exp.Union 3977 elif token_type == TokenType.EXCEPT: 3978 operation = exp.Except 3979 else: 3980 operation = exp.Intersect 3981 3982 comments = self._prev.comments 3983 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3984 by_name = self._match_text_seq("BY", "NAME") 3985 expression = self._parse_select(nested=True, parse_set_operation=False) 3986 3987 this = self.expression( 3988 operation, 3989 comments=comments, 3990 this=this, 3991 distinct=distinct, 3992 by_name=by_name, 3993 expression=expression, 3994 ) 3995 3996 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 3997 expression = this.expression 3998 3999 if expression: 4000 for arg in self.SET_OP_MODIFIERS: 4001 expr = expression.args.get(arg) 4002 if expr: 4003 this.set(arg, expr.pop()) 4004 4005 return this 4006 4007 def _parse_expression(self) -> t.Optional[exp.Expression]: 4008 return self._parse_alias(self._parse_assignment()) 4009 4010 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4011 this = self._parse_disjunction() 4012 4013 while self._match_set(self.ASSIGNMENT): 4014 this = self.expression( 4015 self.ASSIGNMENT[self._prev.token_type], 4016 this=this, 4017 comments=self._prev_comments, 4018 expression=self._parse_assignment(), 4019 ) 4020 4021 return this 4022 4023 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4024 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4025 4026 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4027 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4028 4029 def _parse_equality(self) -> t.Optional[exp.Expression]: 4030 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4031 4032 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4033 return self._parse_tokens(self._parse_range, self.COMPARISON) 4034 4035 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4036 this = this or self._parse_bitwise() 4037 negate = self._match(TokenType.NOT) 4038 4039 if self._match_set(self.RANGE_PARSERS): 4040 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4041 if not expression: 4042 return this 4043 4044 this = expression 4045 elif self._match(TokenType.ISNULL): 4046 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4047 4048 # Postgres supports ISNULL and NOTNULL for conditions. 4049 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4050 if self._match(TokenType.NOTNULL): 4051 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4052 this = self.expression(exp.Not, this=this) 4053 4054 if negate: 4055 this = self.expression(exp.Not, this=this) 4056 4057 if self._match(TokenType.IS): 4058 this = self._parse_is(this) 4059 4060 return this 4061 4062 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4063 index = self._index - 1 4064 negate = self._match(TokenType.NOT) 4065 4066 if self._match_text_seq("DISTINCT", "FROM"): 4067 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4068 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4069 4070 expression = self._parse_null() or self._parse_boolean() 4071 if not expression: 4072 self._retreat(index) 4073 return None 4074 4075 this = self.expression(exp.Is, this=this, expression=expression) 4076 return self.expression(exp.Not, this=this) if negate else this 4077 4078 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4079 unnest = self._parse_unnest(with_alias=False) 4080 if unnest: 4081 this = self.expression(exp.In, this=this, unnest=unnest) 4082 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4083 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4084 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4085 4086 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4087 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4088 else: 4089 this = self.expression(exp.In, this=this, expressions=expressions) 4090 4091 if matched_l_paren: 4092 self._match_r_paren(this) 4093 elif not self._match(TokenType.R_BRACKET, expression=this): 4094 self.raise_error("Expecting ]") 4095 else: 4096 this = self.expression(exp.In, this=this, field=self._parse_field()) 4097 4098 return this 4099 4100 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4101 low = self._parse_bitwise() 4102 self._match(TokenType.AND) 4103 high = self._parse_bitwise() 4104 return self.expression(exp.Between, this=this, low=low, high=high) 4105 4106 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4107 if not self._match(TokenType.ESCAPE): 4108 return this 4109 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4110 4111 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4112 index = self._index 4113 4114 if not self._match(TokenType.INTERVAL) and match_interval: 4115 return None 4116 4117 if self._match(TokenType.STRING, advance=False): 4118 this = self._parse_primary() 4119 else: 4120 this = self._parse_term() 4121 4122 if not this or ( 4123 isinstance(this, exp.Column) 4124 and not this.table 4125 and not this.this.quoted 4126 and this.name.upper() == "IS" 4127 ): 4128 self._retreat(index) 4129 return None 4130 4131 unit = self._parse_function() or ( 4132 not self._match(TokenType.ALIAS, advance=False) 4133 and self._parse_var(any_token=True, upper=True) 4134 ) 4135 4136 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4137 # each INTERVAL expression into this canonical form so it's easy to transpile 4138 if this and this.is_number: 4139 this = exp.Literal.string(this.to_py()) 4140 elif this and this.is_string: 4141 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4142 if len(parts) == 1: 4143 if unit: 4144 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4145 self._retreat(self._index - 1) 4146 4147 this = exp.Literal.string(parts[0][0]) 4148 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4149 4150 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4151 unit = self.expression( 4152 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4153 ) 4154 4155 interval = self.expression(exp.Interval, this=this, unit=unit) 4156 4157 index = self._index 4158 self._match(TokenType.PLUS) 4159 4160 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4161 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4162 return self.expression( 4163 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4164 ) 4165 4166 self._retreat(index) 4167 return interval 4168 4169 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4170 this = self._parse_term() 4171 4172 while True: 4173 if self._match_set(self.BITWISE): 4174 this = self.expression( 4175 self.BITWISE[self._prev.token_type], 4176 this=this, 4177 expression=self._parse_term(), 4178 ) 4179 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4180 this = self.expression( 4181 exp.DPipe, 4182 this=this, 4183 expression=self._parse_term(), 4184 safe=not self.dialect.STRICT_STRING_CONCAT, 4185 ) 4186 elif self._match(TokenType.DQMARK): 4187 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4188 elif self._match_pair(TokenType.LT, TokenType.LT): 4189 this = self.expression( 4190 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4191 ) 4192 elif self._match_pair(TokenType.GT, TokenType.GT): 4193 this = self.expression( 4194 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4195 ) 4196 else: 4197 break 4198 4199 return this 4200 4201 def _parse_term(self) -> t.Optional[exp.Expression]: 4202 return self._parse_tokens(self._parse_factor, self.TERM) 4203 4204 def _parse_factor(self) -> t.Optional[exp.Expression]: 4205 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4206 this = parse_method() 4207 4208 while self._match_set(self.FACTOR): 4209 klass = self.FACTOR[self._prev.token_type] 4210 comments = self._prev_comments 4211 expression = parse_method() 4212 4213 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4214 self._retreat(self._index - 1) 4215 return this 4216 4217 this = self.expression(klass, this=this, comments=comments, expression=expression) 4218 4219 if isinstance(this, exp.Div): 4220 this.args["typed"] = self.dialect.TYPED_DIVISION 4221 this.args["safe"] = self.dialect.SAFE_DIVISION 4222 4223 return this 4224 4225 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4226 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4227 4228 def _parse_unary(self) -> t.Optional[exp.Expression]: 4229 if self._match_set(self.UNARY_PARSERS): 4230 return self.UNARY_PARSERS[self._prev.token_type](self) 4231 return self._parse_at_time_zone(self._parse_type()) 4232 4233 def _parse_type( 4234 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4235 ) -> t.Optional[exp.Expression]: 4236 interval = parse_interval and self._parse_interval() 4237 if interval: 4238 return interval 4239 4240 index = self._index 4241 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4242 4243 if data_type: 4244 index2 = self._index 4245 this = self._parse_primary() 4246 4247 if isinstance(this, exp.Literal): 4248 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4249 if parser: 4250 return parser(self, this, data_type) 4251 4252 return self.expression(exp.Cast, this=this, to=data_type) 4253 4254 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4255 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4256 # 4257 # If the index difference here is greater than 1, that means the parser itself must have 4258 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4259 # 4260 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4261 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4262 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4263 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4264 # 4265 # In these cases, we don't really want to return the converted type, but instead retreat 4266 # and try to parse a Column or Identifier in the section below. 4267 if data_type.expressions and index2 - index > 1: 4268 self._retreat(index2) 4269 return self._parse_column_ops(data_type) 4270 4271 self._retreat(index) 4272 4273 if fallback_to_identifier: 4274 return self._parse_id_var() 4275 4276 this = self._parse_column() 4277 return this and self._parse_column_ops(this) 4278 4279 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4280 this = self._parse_type() 4281 if not this: 4282 return None 4283 4284 if isinstance(this, exp.Column) and not this.table: 4285 this = exp.var(this.name.upper()) 4286 4287 return self.expression( 4288 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4289 ) 4290 4291 def _parse_types( 4292 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4293 ) -> t.Optional[exp.Expression]: 4294 index = self._index 4295 4296 this: t.Optional[exp.Expression] = None 4297 prefix = self._match_text_seq("SYSUDTLIB", ".") 4298 4299 if not self._match_set(self.TYPE_TOKENS): 4300 identifier = allow_identifiers and self._parse_id_var( 4301 any_token=False, tokens=(TokenType.VAR,) 4302 ) 4303 if isinstance(identifier, exp.Identifier): 4304 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4305 4306 if len(tokens) != 1: 4307 self.raise_error("Unexpected identifier", self._prev) 4308 4309 if tokens[0].token_type in self.TYPE_TOKENS: 4310 self._prev = tokens[0] 4311 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4312 type_name = identifier.name 4313 4314 while self._match(TokenType.DOT): 4315 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4316 4317 this = exp.DataType.build(type_name, udt=True) 4318 else: 4319 self._retreat(self._index - 1) 4320 return None 4321 else: 4322 return None 4323 4324 type_token = self._prev.token_type 4325 4326 if type_token == TokenType.PSEUDO_TYPE: 4327 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4328 4329 if type_token == TokenType.OBJECT_IDENTIFIER: 4330 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4331 4332 # https://materialize.com/docs/sql/types/map/ 4333 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4334 key_type = self._parse_types( 4335 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4336 ) 4337 if not self._match(TokenType.FARROW): 4338 self._retreat(index) 4339 return None 4340 4341 value_type = self._parse_types( 4342 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4343 ) 4344 if not self._match(TokenType.R_BRACKET): 4345 self._retreat(index) 4346 return None 4347 4348 return exp.DataType( 4349 this=exp.DataType.Type.MAP, 4350 expressions=[key_type, value_type], 4351 nested=True, 4352 prefix=prefix, 4353 ) 4354 4355 nested = type_token in self.NESTED_TYPE_TOKENS 4356 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4357 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4358 expressions = None 4359 maybe_func = False 4360 4361 if self._match(TokenType.L_PAREN): 4362 if is_struct: 4363 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4364 elif nested: 4365 expressions = self._parse_csv( 4366 lambda: self._parse_types( 4367 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4368 ) 4369 ) 4370 elif type_token in self.ENUM_TYPE_TOKENS: 4371 expressions = self._parse_csv(self._parse_equality) 4372 elif is_aggregate: 4373 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4374 any_token=False, tokens=(TokenType.VAR,) 4375 ) 4376 if not func_or_ident or not self._match(TokenType.COMMA): 4377 return None 4378 expressions = self._parse_csv( 4379 lambda: self._parse_types( 4380 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4381 ) 4382 ) 4383 expressions.insert(0, func_or_ident) 4384 else: 4385 expressions = self._parse_csv(self._parse_type_size) 4386 4387 if not expressions or not self._match(TokenType.R_PAREN): 4388 self._retreat(index) 4389 return None 4390 4391 maybe_func = True 4392 4393 values: t.Optional[t.List[exp.Expression]] = None 4394 4395 if nested and self._match(TokenType.LT): 4396 if is_struct: 4397 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4398 else: 4399 expressions = self._parse_csv( 4400 lambda: self._parse_types( 4401 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4402 ) 4403 ) 4404 4405 if not self._match(TokenType.GT): 4406 self.raise_error("Expecting >") 4407 4408 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4409 values = self._parse_csv(self._parse_assignment) 4410 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4411 4412 if type_token in self.TIMESTAMPS: 4413 if self._match_text_seq("WITH", "TIME", "ZONE"): 4414 maybe_func = False 4415 tz_type = ( 4416 exp.DataType.Type.TIMETZ 4417 if type_token in self.TIMES 4418 else exp.DataType.Type.TIMESTAMPTZ 4419 ) 4420 this = exp.DataType(this=tz_type, expressions=expressions) 4421 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4422 maybe_func = False 4423 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4424 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4425 maybe_func = False 4426 elif type_token == TokenType.INTERVAL: 4427 unit = self._parse_var(upper=True) 4428 if unit: 4429 if self._match_text_seq("TO"): 4430 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4431 4432 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4433 else: 4434 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4435 4436 if maybe_func and check_func: 4437 index2 = self._index 4438 peek = self._parse_string() 4439 4440 if not peek: 4441 self._retreat(index) 4442 return None 4443 4444 self._retreat(index2) 4445 4446 if not this: 4447 if self._match_text_seq("UNSIGNED"): 4448 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4449 if not unsigned_type_token: 4450 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4451 4452 type_token = unsigned_type_token or type_token 4453 4454 this = exp.DataType( 4455 this=exp.DataType.Type[type_token.value], 4456 expressions=expressions, 4457 nested=nested, 4458 values=values, 4459 prefix=prefix, 4460 ) 4461 elif expressions: 4462 this.set("expressions", expressions) 4463 4464 # https://materialize.com/docs/sql/types/list/#type-name 4465 while self._match(TokenType.LIST): 4466 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4467 4468 index = self._index 4469 4470 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4471 matched_array = self._match(TokenType.ARRAY) 4472 4473 while self._curr: 4474 matched_l_bracket = self._match(TokenType.L_BRACKET) 4475 if not matched_l_bracket and not matched_array: 4476 break 4477 4478 matched_array = False 4479 values = self._parse_csv(self._parse_assignment) or None 4480 if values and not schema: 4481 self._retreat(index) 4482 break 4483 4484 this = exp.DataType( 4485 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4486 ) 4487 self._match(TokenType.R_BRACKET) 4488 4489 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4490 converter = self.TYPE_CONVERTERS.get(this.this) 4491 if converter: 4492 this = converter(t.cast(exp.DataType, this)) 4493 4494 return this 4495 4496 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4497 index = self._index 4498 4499 if ( 4500 self._curr 4501 and self._next 4502 and self._curr.token_type in self.TYPE_TOKENS 4503 and self._next.token_type in self.TYPE_TOKENS 4504 ): 4505 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4506 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4507 this = self._parse_id_var() 4508 else: 4509 this = ( 4510 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4511 or self._parse_id_var() 4512 ) 4513 4514 self._match(TokenType.COLON) 4515 4516 if ( 4517 type_required 4518 and not isinstance(this, exp.DataType) 4519 and not self._match_set(self.TYPE_TOKENS, advance=False) 4520 ): 4521 self._retreat(index) 4522 return self._parse_types() 4523 4524 return self._parse_column_def(this) 4525 4526 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4527 if not self._match_text_seq("AT", "TIME", "ZONE"): 4528 return this 4529 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4530 4531 def _parse_column(self) -> t.Optional[exp.Expression]: 4532 this = self._parse_column_reference() 4533 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4534 4535 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4536 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4537 4538 return column 4539 4540 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4541 this = self._parse_field() 4542 if ( 4543 not this 4544 and self._match(TokenType.VALUES, advance=False) 4545 and self.VALUES_FOLLOWED_BY_PAREN 4546 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4547 ): 4548 this = self._parse_id_var() 4549 4550 if isinstance(this, exp.Identifier): 4551 # We bubble up comments from the Identifier to the Column 4552 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4553 4554 return this 4555 4556 def _parse_colon_as_json_extract( 4557 self, this: t.Optional[exp.Expression] 4558 ) -> t.Optional[exp.Expression]: 4559 casts = [] 4560 json_path = [] 4561 4562 while self._match(TokenType.COLON): 4563 start_index = self._index 4564 4565 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4566 path = self._parse_column_ops( 4567 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4568 ) 4569 4570 # The cast :: operator has a lower precedence than the extraction operator :, so 4571 # we rearrange the AST appropriately to avoid casting the JSON path 4572 while isinstance(path, exp.Cast): 4573 casts.append(path.to) 4574 path = path.this 4575 4576 if casts: 4577 dcolon_offset = next( 4578 i 4579 for i, t in enumerate(self._tokens[start_index:]) 4580 if t.token_type == TokenType.DCOLON 4581 ) 4582 end_token = self._tokens[start_index + dcolon_offset - 1] 4583 else: 4584 end_token = self._prev 4585 4586 if path: 4587 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 4588 4589 if json_path: 4590 this = self.expression( 4591 exp.JSONExtract, 4592 this=this, 4593 expression=self.dialect.to_json_path(exp.Literal.string(".".join(json_path))), 4594 ) 4595 4596 while casts: 4597 this = self.expression(exp.Cast, this=this, to=casts.pop()) 4598 4599 return this 4600 4601 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 4602 return self._parse_types() 4603 4604 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4605 this = self._parse_bracket(this) 4606 4607 while self._match_set(self.COLUMN_OPERATORS): 4608 op_token = self._prev.token_type 4609 op = self.COLUMN_OPERATORS.get(op_token) 4610 4611 if op_token == TokenType.DCOLON: 4612 field = self._parse_dcolon() 4613 if not field: 4614 self.raise_error("Expected type") 4615 elif op and self._curr: 4616 field = self._parse_column_reference() 4617 else: 4618 field = self._parse_field(any_token=True, anonymous_func=True) 4619 4620 if isinstance(field, exp.Func) and this: 4621 # bigquery allows function calls like x.y.count(...) 4622 # SAFE.SUBSTR(...) 4623 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4624 this = exp.replace_tree( 4625 this, 4626 lambda n: ( 4627 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4628 if n.table 4629 else n.this 4630 ) 4631 if isinstance(n, exp.Column) 4632 else n, 4633 ) 4634 4635 if op: 4636 this = op(self, this, field) 4637 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4638 this = self.expression( 4639 exp.Column, 4640 this=field, 4641 table=this.this, 4642 db=this.args.get("table"), 4643 catalog=this.args.get("db"), 4644 ) 4645 else: 4646 this = self.expression(exp.Dot, this=this, expression=field) 4647 4648 this = self._parse_bracket(this) 4649 4650 return self._parse_colon_as_json_extract(this) if self.COLON_IS_JSON_EXTRACT else this 4651 4652 def _parse_primary(self) -> t.Optional[exp.Expression]: 4653 if self._match_set(self.PRIMARY_PARSERS): 4654 token_type = self._prev.token_type 4655 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4656 4657 if token_type == TokenType.STRING: 4658 expressions = [primary] 4659 while self._match(TokenType.STRING): 4660 expressions.append(exp.Literal.string(self._prev.text)) 4661 4662 if len(expressions) > 1: 4663 return self.expression(exp.Concat, expressions=expressions) 4664 4665 return primary 4666 4667 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4668 return exp.Literal.number(f"0.{self._prev.text}") 4669 4670 if self._match(TokenType.L_PAREN): 4671 comments = self._prev_comments 4672 query = self._parse_select() 4673 4674 if query: 4675 expressions = [query] 4676 else: 4677 expressions = self._parse_expressions() 4678 4679 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4680 4681 if not this and self._match(TokenType.R_PAREN, advance=False): 4682 this = self.expression(exp.Tuple) 4683 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4684 this = self._parse_subquery(this=this, parse_alias=False) 4685 elif isinstance(this, exp.Subquery): 4686 this = self._parse_subquery( 4687 this=self._parse_set_operations(this), parse_alias=False 4688 ) 4689 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4690 this = self.expression(exp.Tuple, expressions=expressions) 4691 else: 4692 this = self.expression(exp.Paren, this=this) 4693 4694 if this: 4695 this.add_comments(comments) 4696 4697 self._match_r_paren(expression=this) 4698 return this 4699 4700 return None 4701 4702 def _parse_field( 4703 self, 4704 any_token: bool = False, 4705 tokens: t.Optional[t.Collection[TokenType]] = None, 4706 anonymous_func: bool = False, 4707 ) -> t.Optional[exp.Expression]: 4708 if anonymous_func: 4709 field = ( 4710 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4711 or self._parse_primary() 4712 ) 4713 else: 4714 field = self._parse_primary() or self._parse_function( 4715 anonymous=anonymous_func, any_token=any_token 4716 ) 4717 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4718 4719 def _parse_function( 4720 self, 4721 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4722 anonymous: bool = False, 4723 optional_parens: bool = True, 4724 any_token: bool = False, 4725 ) -> t.Optional[exp.Expression]: 4726 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4727 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4728 fn_syntax = False 4729 if ( 4730 self._match(TokenType.L_BRACE, advance=False) 4731 and self._next 4732 and self._next.text.upper() == "FN" 4733 ): 4734 self._advance(2) 4735 fn_syntax = True 4736 4737 func = self._parse_function_call( 4738 functions=functions, 4739 anonymous=anonymous, 4740 optional_parens=optional_parens, 4741 any_token=any_token, 4742 ) 4743 4744 if fn_syntax: 4745 self._match(TokenType.R_BRACE) 4746 4747 return func 4748 4749 def _parse_function_call( 4750 self, 4751 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4752 anonymous: bool = False, 4753 optional_parens: bool = True, 4754 any_token: bool = False, 4755 ) -> t.Optional[exp.Expression]: 4756 if not self._curr: 4757 return None 4758 4759 comments = self._curr.comments 4760 token_type = self._curr.token_type 4761 this = self._curr.text 4762 upper = this.upper() 4763 4764 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4765 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4766 self._advance() 4767 return self._parse_window(parser(self)) 4768 4769 if not self._next or self._next.token_type != TokenType.L_PAREN: 4770 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4771 self._advance() 4772 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4773 4774 return None 4775 4776 if any_token: 4777 if token_type in self.RESERVED_TOKENS: 4778 return None 4779 elif token_type not in self.FUNC_TOKENS: 4780 return None 4781 4782 self._advance(2) 4783 4784 parser = self.FUNCTION_PARSERS.get(upper) 4785 if parser and not anonymous: 4786 this = parser(self) 4787 else: 4788 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4789 4790 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4791 this = self.expression(subquery_predicate, this=self._parse_select()) 4792 self._match_r_paren() 4793 return this 4794 4795 if functions is None: 4796 functions = self.FUNCTIONS 4797 4798 function = functions.get(upper) 4799 4800 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4801 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4802 4803 if alias: 4804 args = self._kv_to_prop_eq(args) 4805 4806 if function and not anonymous: 4807 if "dialect" in function.__code__.co_varnames: 4808 func = function(args, dialect=self.dialect) 4809 else: 4810 func = function(args) 4811 4812 func = self.validate_expression(func, args) 4813 if not self.dialect.NORMALIZE_FUNCTIONS: 4814 func.meta["name"] = this 4815 4816 this = func 4817 else: 4818 if token_type == TokenType.IDENTIFIER: 4819 this = exp.Identifier(this=this, quoted=True) 4820 this = self.expression(exp.Anonymous, this=this, expressions=args) 4821 4822 if isinstance(this, exp.Expression): 4823 this.add_comments(comments) 4824 4825 self._match_r_paren(this) 4826 return self._parse_window(this) 4827 4828 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4829 transformed = [] 4830 4831 for e in expressions: 4832 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4833 if isinstance(e, exp.Alias): 4834 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4835 4836 if not isinstance(e, exp.PropertyEQ): 4837 e = self.expression( 4838 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4839 ) 4840 4841 if isinstance(e.this, exp.Column): 4842 e.this.replace(e.this.this) 4843 4844 transformed.append(e) 4845 4846 return transformed 4847 4848 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4849 return self._parse_column_def(self._parse_id_var()) 4850 4851 def _parse_user_defined_function( 4852 self, kind: t.Optional[TokenType] = None 4853 ) -> t.Optional[exp.Expression]: 4854 this = self._parse_id_var() 4855 4856 while self._match(TokenType.DOT): 4857 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4858 4859 if not self._match(TokenType.L_PAREN): 4860 return this 4861 4862 expressions = self._parse_csv(self._parse_function_parameter) 4863 self._match_r_paren() 4864 return self.expression( 4865 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4866 ) 4867 4868 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4869 literal = self._parse_primary() 4870 if literal: 4871 return self.expression(exp.Introducer, this=token.text, expression=literal) 4872 4873 return self.expression(exp.Identifier, this=token.text) 4874 4875 def _parse_session_parameter(self) -> exp.SessionParameter: 4876 kind = None 4877 this = self._parse_id_var() or self._parse_primary() 4878 4879 if this and self._match(TokenType.DOT): 4880 kind = this.name 4881 this = self._parse_var() or self._parse_primary() 4882 4883 return self.expression(exp.SessionParameter, this=this, kind=kind) 4884 4885 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 4886 return self._parse_id_var() 4887 4888 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4889 index = self._index 4890 4891 if self._match(TokenType.L_PAREN): 4892 expressions = t.cast( 4893 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 4894 ) 4895 4896 if not self._match(TokenType.R_PAREN): 4897 self._retreat(index) 4898 else: 4899 expressions = [self._parse_lambda_arg()] 4900 4901 if self._match_set(self.LAMBDAS): 4902 return self.LAMBDAS[self._prev.token_type](self, expressions) 4903 4904 self._retreat(index) 4905 4906 this: t.Optional[exp.Expression] 4907 4908 if self._match(TokenType.DISTINCT): 4909 this = self.expression( 4910 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 4911 ) 4912 else: 4913 this = self._parse_select_or_expression(alias=alias) 4914 4915 return self._parse_limit( 4916 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4917 ) 4918 4919 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4920 index = self._index 4921 if not self._match(TokenType.L_PAREN): 4922 return this 4923 4924 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4925 # expr can be of both types 4926 if self._match_set(self.SELECT_START_TOKENS): 4927 self._retreat(index) 4928 return this 4929 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4930 self._match_r_paren() 4931 return self.expression(exp.Schema, this=this, expressions=args) 4932 4933 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4934 return self._parse_column_def(self._parse_field(any_token=True)) 4935 4936 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4937 # column defs are not really columns, they're identifiers 4938 if isinstance(this, exp.Column): 4939 this = this.this 4940 4941 kind = self._parse_types(schema=True) 4942 4943 if self._match_text_seq("FOR", "ORDINALITY"): 4944 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4945 4946 constraints: t.List[exp.Expression] = [] 4947 4948 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4949 ("ALIAS", "MATERIALIZED") 4950 ): 4951 persisted = self._prev.text.upper() == "MATERIALIZED" 4952 constraints.append( 4953 self.expression( 4954 exp.ComputedColumnConstraint, 4955 this=self._parse_assignment(), 4956 persisted=persisted or self._match_text_seq("PERSISTED"), 4957 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4958 ) 4959 ) 4960 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4961 self._match(TokenType.ALIAS) 4962 constraints.append( 4963 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4964 ) 4965 4966 while True: 4967 constraint = self._parse_column_constraint() 4968 if not constraint: 4969 break 4970 constraints.append(constraint) 4971 4972 if not kind and not constraints: 4973 return this 4974 4975 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4976 4977 def _parse_auto_increment( 4978 self, 4979 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4980 start = None 4981 increment = None 4982 4983 if self._match(TokenType.L_PAREN, advance=False): 4984 args = self._parse_wrapped_csv(self._parse_bitwise) 4985 start = seq_get(args, 0) 4986 increment = seq_get(args, 1) 4987 elif self._match_text_seq("START"): 4988 start = self._parse_bitwise() 4989 self._match_text_seq("INCREMENT") 4990 increment = self._parse_bitwise() 4991 4992 if start and increment: 4993 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4994 4995 return exp.AutoIncrementColumnConstraint() 4996 4997 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4998 if not self._match_text_seq("REFRESH"): 4999 self._retreat(self._index - 1) 5000 return None 5001 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5002 5003 def _parse_compress(self) -> exp.CompressColumnConstraint: 5004 if self._match(TokenType.L_PAREN, advance=False): 5005 return self.expression( 5006 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5007 ) 5008 5009 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5010 5011 def _parse_generated_as_identity( 5012 self, 5013 ) -> ( 5014 exp.GeneratedAsIdentityColumnConstraint 5015 | exp.ComputedColumnConstraint 5016 | exp.GeneratedAsRowColumnConstraint 5017 ): 5018 if self._match_text_seq("BY", "DEFAULT"): 5019 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5020 this = self.expression( 5021 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5022 ) 5023 else: 5024 self._match_text_seq("ALWAYS") 5025 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5026 5027 self._match(TokenType.ALIAS) 5028 5029 if self._match_text_seq("ROW"): 5030 start = self._match_text_seq("START") 5031 if not start: 5032 self._match(TokenType.END) 5033 hidden = self._match_text_seq("HIDDEN") 5034 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5035 5036 identity = self._match_text_seq("IDENTITY") 5037 5038 if self._match(TokenType.L_PAREN): 5039 if self._match(TokenType.START_WITH): 5040 this.set("start", self._parse_bitwise()) 5041 if self._match_text_seq("INCREMENT", "BY"): 5042 this.set("increment", self._parse_bitwise()) 5043 if self._match_text_seq("MINVALUE"): 5044 this.set("minvalue", self._parse_bitwise()) 5045 if self._match_text_seq("MAXVALUE"): 5046 this.set("maxvalue", self._parse_bitwise()) 5047 5048 if self._match_text_seq("CYCLE"): 5049 this.set("cycle", True) 5050 elif self._match_text_seq("NO", "CYCLE"): 5051 this.set("cycle", False) 5052 5053 if not identity: 5054 this.set("expression", self._parse_range()) 5055 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5056 args = self._parse_csv(self._parse_bitwise) 5057 this.set("start", seq_get(args, 0)) 5058 this.set("increment", seq_get(args, 1)) 5059 5060 self._match_r_paren() 5061 5062 return this 5063 5064 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5065 self._match_text_seq("LENGTH") 5066 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5067 5068 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5069 if self._match_text_seq("NULL"): 5070 return self.expression(exp.NotNullColumnConstraint) 5071 if self._match_text_seq("CASESPECIFIC"): 5072 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5073 if self._match_text_seq("FOR", "REPLICATION"): 5074 return self.expression(exp.NotForReplicationColumnConstraint) 5075 return None 5076 5077 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5078 if self._match(TokenType.CONSTRAINT): 5079 this = self._parse_id_var() 5080 else: 5081 this = None 5082 5083 if self._match_texts(self.CONSTRAINT_PARSERS): 5084 return self.expression( 5085 exp.ColumnConstraint, 5086 this=this, 5087 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5088 ) 5089 5090 return this 5091 5092 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5093 if not self._match(TokenType.CONSTRAINT): 5094 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5095 5096 return self.expression( 5097 exp.Constraint, 5098 this=self._parse_id_var(), 5099 expressions=self._parse_unnamed_constraints(), 5100 ) 5101 5102 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5103 constraints = [] 5104 while True: 5105 constraint = self._parse_unnamed_constraint() or self._parse_function() 5106 if not constraint: 5107 break 5108 constraints.append(constraint) 5109 5110 return constraints 5111 5112 def _parse_unnamed_constraint( 5113 self, constraints: t.Optional[t.Collection[str]] = None 5114 ) -> t.Optional[exp.Expression]: 5115 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5116 constraints or self.CONSTRAINT_PARSERS 5117 ): 5118 return None 5119 5120 constraint = self._prev.text.upper() 5121 if constraint not in self.CONSTRAINT_PARSERS: 5122 self.raise_error(f"No parser found for schema constraint {constraint}.") 5123 5124 return self.CONSTRAINT_PARSERS[constraint](self) 5125 5126 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5127 self._match_text_seq("KEY") 5128 return self.expression( 5129 exp.UniqueColumnConstraint, 5130 this=self._parse_schema(self._parse_id_var(any_token=False)), 5131 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5132 on_conflict=self._parse_on_conflict(), 5133 ) 5134 5135 def _parse_key_constraint_options(self) -> t.List[str]: 5136 options = [] 5137 while True: 5138 if not self._curr: 5139 break 5140 5141 if self._match(TokenType.ON): 5142 action = None 5143 on = self._advance_any() and self._prev.text 5144 5145 if self._match_text_seq("NO", "ACTION"): 5146 action = "NO ACTION" 5147 elif self._match_text_seq("CASCADE"): 5148 action = "CASCADE" 5149 elif self._match_text_seq("RESTRICT"): 5150 action = "RESTRICT" 5151 elif self._match_pair(TokenType.SET, TokenType.NULL): 5152 action = "SET NULL" 5153 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5154 action = "SET DEFAULT" 5155 else: 5156 self.raise_error("Invalid key constraint") 5157 5158 options.append(f"ON {on} {action}") 5159 elif self._match_text_seq("NOT", "ENFORCED"): 5160 options.append("NOT ENFORCED") 5161 elif self._match_text_seq("DEFERRABLE"): 5162 options.append("DEFERRABLE") 5163 elif self._match_text_seq("INITIALLY", "DEFERRED"): 5164 options.append("INITIALLY DEFERRED") 5165 elif self._match_text_seq("NORELY"): 5166 options.append("NORELY") 5167 elif self._match_text_seq("MATCH", "FULL"): 5168 options.append("MATCH FULL") 5169 else: 5170 break 5171 5172 return options 5173 5174 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5175 if match and not self._match(TokenType.REFERENCES): 5176 return None 5177 5178 expressions = None 5179 this = self._parse_table(schema=True) 5180 options = self._parse_key_constraint_options() 5181 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5182 5183 def _parse_foreign_key(self) -> exp.ForeignKey: 5184 expressions = self._parse_wrapped_id_vars() 5185 reference = self._parse_references() 5186 options = {} 5187 5188 while self._match(TokenType.ON): 5189 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5190 self.raise_error("Expected DELETE or UPDATE") 5191 5192 kind = self._prev.text.lower() 5193 5194 if self._match_text_seq("NO", "ACTION"): 5195 action = "NO ACTION" 5196 elif self._match(TokenType.SET): 5197 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5198 action = "SET " + self._prev.text.upper() 5199 else: 5200 self._advance() 5201 action = self._prev.text.upper() 5202 5203 options[kind] = action 5204 5205 return self.expression( 5206 exp.ForeignKey, 5207 expressions=expressions, 5208 reference=reference, 5209 **options, # type: ignore 5210 ) 5211 5212 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5213 return self._parse_field() 5214 5215 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5216 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5217 self._retreat(self._index - 1) 5218 return None 5219 5220 id_vars = self._parse_wrapped_id_vars() 5221 return self.expression( 5222 exp.PeriodForSystemTimeConstraint, 5223 this=seq_get(id_vars, 0), 5224 expression=seq_get(id_vars, 1), 5225 ) 5226 5227 def _parse_primary_key( 5228 self, wrapped_optional: bool = False, in_props: bool = False 5229 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5230 desc = ( 5231 self._match_set((TokenType.ASC, TokenType.DESC)) 5232 and self._prev.token_type == TokenType.DESC 5233 ) 5234 5235 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5236 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5237 5238 expressions = self._parse_wrapped_csv( 5239 self._parse_primary_key_part, optional=wrapped_optional 5240 ) 5241 options = self._parse_key_constraint_options() 5242 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5243 5244 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5245 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5246 5247 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5248 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5249 return this 5250 5251 bracket_kind = self._prev.token_type 5252 expressions = self._parse_csv( 5253 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5254 ) 5255 5256 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5257 self.raise_error("Expected ]") 5258 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5259 self.raise_error("Expected }") 5260 5261 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5262 if bracket_kind == TokenType.L_BRACE: 5263 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5264 elif not this: 5265 this = self.expression(exp.Array, expressions=expressions) 5266 else: 5267 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5268 if constructor_type: 5269 return self.expression(constructor_type, expressions=expressions) 5270 5271 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5272 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5273 5274 self._add_comments(this) 5275 return self._parse_bracket(this) 5276 5277 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5278 if self._match(TokenType.COLON): 5279 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5280 return this 5281 5282 def _parse_case(self) -> t.Optional[exp.Expression]: 5283 ifs = [] 5284 default = None 5285 5286 comments = self._prev_comments 5287 expression = self._parse_assignment() 5288 5289 while self._match(TokenType.WHEN): 5290 this = self._parse_assignment() 5291 self._match(TokenType.THEN) 5292 then = self._parse_assignment() 5293 ifs.append(self.expression(exp.If, this=this, true=then)) 5294 5295 if self._match(TokenType.ELSE): 5296 default = self._parse_assignment() 5297 5298 if not self._match(TokenType.END): 5299 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5300 default = exp.column("interval") 5301 else: 5302 self.raise_error("Expected END after CASE", self._prev) 5303 5304 return self.expression( 5305 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5306 ) 5307 5308 def _parse_if(self) -> t.Optional[exp.Expression]: 5309 if self._match(TokenType.L_PAREN): 5310 args = self._parse_csv(self._parse_assignment) 5311 this = self.validate_expression(exp.If.from_arg_list(args), args) 5312 self._match_r_paren() 5313 else: 5314 index = self._index - 1 5315 5316 if self.NO_PAREN_IF_COMMANDS and index == 0: 5317 return self._parse_as_command(self._prev) 5318 5319 condition = self._parse_assignment() 5320 5321 if not condition: 5322 self._retreat(index) 5323 return None 5324 5325 self._match(TokenType.THEN) 5326 true = self._parse_assignment() 5327 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5328 self._match(TokenType.END) 5329 this = self.expression(exp.If, this=condition, true=true, false=false) 5330 5331 return this 5332 5333 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5334 if not self._match_text_seq("VALUE", "FOR"): 5335 self._retreat(self._index - 1) 5336 return None 5337 5338 return self.expression( 5339 exp.NextValueFor, 5340 this=self._parse_column(), 5341 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5342 ) 5343 5344 def _parse_extract(self) -> exp.Extract: 5345 this = self._parse_function() or self._parse_var_or_string(upper=True) 5346 5347 if self._match(TokenType.FROM): 5348 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5349 5350 if not self._match(TokenType.COMMA): 5351 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5352 5353 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5354 5355 def _parse_gap_fill(self) -> exp.GapFill: 5356 self._match(TokenType.TABLE) 5357 this = self._parse_table() 5358 5359 self._match(TokenType.COMMA) 5360 args = [this, *self._parse_csv(self._parse_lambda)] 5361 5362 gap_fill = exp.GapFill.from_arg_list(args) 5363 return self.validate_expression(gap_fill, args) 5364 5365 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5366 this = self._parse_assignment() 5367 5368 if not self._match(TokenType.ALIAS): 5369 if self._match(TokenType.COMMA): 5370 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5371 5372 self.raise_error("Expected AS after CAST") 5373 5374 fmt = None 5375 to = self._parse_types() 5376 5377 if self._match(TokenType.FORMAT): 5378 fmt_string = self._parse_string() 5379 fmt = self._parse_at_time_zone(fmt_string) 5380 5381 if not to: 5382 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5383 if to.this in exp.DataType.TEMPORAL_TYPES: 5384 this = self.expression( 5385 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5386 this=this, 5387 format=exp.Literal.string( 5388 format_time( 5389 fmt_string.this if fmt_string else "", 5390 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5391 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5392 ) 5393 ), 5394 safe=safe, 5395 ) 5396 5397 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5398 this.set("zone", fmt.args["zone"]) 5399 return this 5400 elif not to: 5401 self.raise_error("Expected TYPE after CAST") 5402 elif isinstance(to, exp.Identifier): 5403 to = exp.DataType.build(to.name, udt=True) 5404 elif to.this == exp.DataType.Type.CHAR: 5405 if self._match(TokenType.CHARACTER_SET): 5406 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5407 5408 return self.expression( 5409 exp.Cast if strict else exp.TryCast, 5410 this=this, 5411 to=to, 5412 format=fmt, 5413 safe=safe, 5414 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5415 ) 5416 5417 def _parse_string_agg(self) -> exp.Expression: 5418 if self._match(TokenType.DISTINCT): 5419 args: t.List[t.Optional[exp.Expression]] = [ 5420 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5421 ] 5422 if self._match(TokenType.COMMA): 5423 args.extend(self._parse_csv(self._parse_assignment)) 5424 else: 5425 args = self._parse_csv(self._parse_assignment) # type: ignore 5426 5427 index = self._index 5428 if not self._match(TokenType.R_PAREN) and args: 5429 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5430 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5431 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5432 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5433 5434 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5435 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5436 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5437 if not self._match_text_seq("WITHIN", "GROUP"): 5438 self._retreat(index) 5439 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5440 5441 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5442 order = self._parse_order(this=seq_get(args, 0)) 5443 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5444 5445 def _parse_convert( 5446 self, strict: bool, safe: t.Optional[bool] = None 5447 ) -> t.Optional[exp.Expression]: 5448 this = self._parse_bitwise() 5449 5450 if self._match(TokenType.USING): 5451 to: t.Optional[exp.Expression] = self.expression( 5452 exp.CharacterSet, this=self._parse_var() 5453 ) 5454 elif self._match(TokenType.COMMA): 5455 to = self._parse_types() 5456 else: 5457 to = None 5458 5459 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5460 5461 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5462 """ 5463 There are generally two variants of the DECODE function: 5464 5465 - DECODE(bin, charset) 5466 - DECODE(expression, search, result [, search, result] ... [, default]) 5467 5468 The second variant will always be parsed into a CASE expression. Note that NULL 5469 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5470 instead of relying on pattern matching. 5471 """ 5472 args = self._parse_csv(self._parse_assignment) 5473 5474 if len(args) < 3: 5475 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5476 5477 expression, *expressions = args 5478 if not expression: 5479 return None 5480 5481 ifs = [] 5482 for search, result in zip(expressions[::2], expressions[1::2]): 5483 if not search or not result: 5484 return None 5485 5486 if isinstance(search, exp.Literal): 5487 ifs.append( 5488 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5489 ) 5490 elif isinstance(search, exp.Null): 5491 ifs.append( 5492 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5493 ) 5494 else: 5495 cond = exp.or_( 5496 exp.EQ(this=expression.copy(), expression=search), 5497 exp.and_( 5498 exp.Is(this=expression.copy(), expression=exp.Null()), 5499 exp.Is(this=search.copy(), expression=exp.Null()), 5500 copy=False, 5501 ), 5502 copy=False, 5503 ) 5504 ifs.append(exp.If(this=cond, true=result)) 5505 5506 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5507 5508 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5509 self._match_text_seq("KEY") 5510 key = self._parse_column() 5511 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5512 self._match_text_seq("VALUE") 5513 value = self._parse_bitwise() 5514 5515 if not key and not value: 5516 return None 5517 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5518 5519 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5520 if not this or not self._match_text_seq("FORMAT", "JSON"): 5521 return this 5522 5523 return self.expression(exp.FormatJson, this=this) 5524 5525 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5526 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5527 for value in values: 5528 if self._match_text_seq(value, "ON", on): 5529 return f"{value} ON {on}" 5530 5531 return None 5532 5533 @t.overload 5534 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5535 5536 @t.overload 5537 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5538 5539 def _parse_json_object(self, agg=False): 5540 star = self._parse_star() 5541 expressions = ( 5542 [star] 5543 if star 5544 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5545 ) 5546 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5547 5548 unique_keys = None 5549 if self._match_text_seq("WITH", "UNIQUE"): 5550 unique_keys = True 5551 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5552 unique_keys = False 5553 5554 self._match_text_seq("KEYS") 5555 5556 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5557 self._parse_type() 5558 ) 5559 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5560 5561 return self.expression( 5562 exp.JSONObjectAgg if agg else exp.JSONObject, 5563 expressions=expressions, 5564 null_handling=null_handling, 5565 unique_keys=unique_keys, 5566 return_type=return_type, 5567 encoding=encoding, 5568 ) 5569 5570 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5571 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5572 if not self._match_text_seq("NESTED"): 5573 this = self._parse_id_var() 5574 kind = self._parse_types(allow_identifiers=False) 5575 nested = None 5576 else: 5577 this = None 5578 kind = None 5579 nested = True 5580 5581 path = self._match_text_seq("PATH") and self._parse_string() 5582 nested_schema = nested and self._parse_json_schema() 5583 5584 return self.expression( 5585 exp.JSONColumnDef, 5586 this=this, 5587 kind=kind, 5588 path=path, 5589 nested_schema=nested_schema, 5590 ) 5591 5592 def _parse_json_schema(self) -> exp.JSONSchema: 5593 self._match_text_seq("COLUMNS") 5594 return self.expression( 5595 exp.JSONSchema, 5596 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5597 ) 5598 5599 def _parse_json_table(self) -> exp.JSONTable: 5600 this = self._parse_format_json(self._parse_bitwise()) 5601 path = self._match(TokenType.COMMA) and self._parse_string() 5602 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5603 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5604 schema = self._parse_json_schema() 5605 5606 return exp.JSONTable( 5607 this=this, 5608 schema=schema, 5609 path=path, 5610 error_handling=error_handling, 5611 empty_handling=empty_handling, 5612 ) 5613 5614 def _parse_match_against(self) -> exp.MatchAgainst: 5615 expressions = self._parse_csv(self._parse_column) 5616 5617 self._match_text_seq(")", "AGAINST", "(") 5618 5619 this = self._parse_string() 5620 5621 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5622 modifier = "IN NATURAL LANGUAGE MODE" 5623 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5624 modifier = f"{modifier} WITH QUERY EXPANSION" 5625 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5626 modifier = "IN BOOLEAN MODE" 5627 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5628 modifier = "WITH QUERY EXPANSION" 5629 else: 5630 modifier = None 5631 5632 return self.expression( 5633 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5634 ) 5635 5636 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5637 def _parse_open_json(self) -> exp.OpenJSON: 5638 this = self._parse_bitwise() 5639 path = self._match(TokenType.COMMA) and self._parse_string() 5640 5641 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5642 this = self._parse_field(any_token=True) 5643 kind = self._parse_types() 5644 path = self._parse_string() 5645 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5646 5647 return self.expression( 5648 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5649 ) 5650 5651 expressions = None 5652 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5653 self._match_l_paren() 5654 expressions = self._parse_csv(_parse_open_json_column_def) 5655 5656 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5657 5658 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5659 args = self._parse_csv(self._parse_bitwise) 5660 5661 if self._match(TokenType.IN): 5662 return self.expression( 5663 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5664 ) 5665 5666 if haystack_first: 5667 haystack = seq_get(args, 0) 5668 needle = seq_get(args, 1) 5669 else: 5670 needle = seq_get(args, 0) 5671 haystack = seq_get(args, 1) 5672 5673 return self.expression( 5674 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5675 ) 5676 5677 def _parse_predict(self) -> exp.Predict: 5678 self._match_text_seq("MODEL") 5679 this = self._parse_table() 5680 5681 self._match(TokenType.COMMA) 5682 self._match_text_seq("TABLE") 5683 5684 return self.expression( 5685 exp.Predict, 5686 this=this, 5687 expression=self._parse_table(), 5688 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5689 ) 5690 5691 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5692 args = self._parse_csv(self._parse_table) 5693 return exp.JoinHint(this=func_name.upper(), expressions=args) 5694 5695 def _parse_substring(self) -> exp.Substring: 5696 # Postgres supports the form: substring(string [from int] [for int]) 5697 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5698 5699 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5700 5701 if self._match(TokenType.FROM): 5702 args.append(self._parse_bitwise()) 5703 if self._match(TokenType.FOR): 5704 if len(args) == 1: 5705 args.append(exp.Literal.number(1)) 5706 args.append(self._parse_bitwise()) 5707 5708 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5709 5710 def _parse_trim(self) -> exp.Trim: 5711 # https://www.w3resource.com/sql/character-functions/trim.php 5712 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5713 5714 position = None 5715 collation = None 5716 expression = None 5717 5718 if self._match_texts(self.TRIM_TYPES): 5719 position = self._prev.text.upper() 5720 5721 this = self._parse_bitwise() 5722 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5723 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5724 expression = self._parse_bitwise() 5725 5726 if invert_order: 5727 this, expression = expression, this 5728 5729 if self._match(TokenType.COLLATE): 5730 collation = self._parse_bitwise() 5731 5732 return self.expression( 5733 exp.Trim, this=this, position=position, expression=expression, collation=collation 5734 ) 5735 5736 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5737 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5738 5739 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5740 return self._parse_window(self._parse_id_var(), alias=True) 5741 5742 def _parse_respect_or_ignore_nulls( 5743 self, this: t.Optional[exp.Expression] 5744 ) -> t.Optional[exp.Expression]: 5745 if self._match_text_seq("IGNORE", "NULLS"): 5746 return self.expression(exp.IgnoreNulls, this=this) 5747 if self._match_text_seq("RESPECT", "NULLS"): 5748 return self.expression(exp.RespectNulls, this=this) 5749 return this 5750 5751 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5752 if self._match(TokenType.HAVING): 5753 self._match_texts(("MAX", "MIN")) 5754 max = self._prev.text.upper() != "MIN" 5755 return self.expression( 5756 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5757 ) 5758 5759 return this 5760 5761 def _parse_window( 5762 self, this: t.Optional[exp.Expression], alias: bool = False 5763 ) -> t.Optional[exp.Expression]: 5764 func = this 5765 comments = func.comments if isinstance(func, exp.Expression) else None 5766 5767 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5768 self._match(TokenType.WHERE) 5769 this = self.expression( 5770 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5771 ) 5772 self._match_r_paren() 5773 5774 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5775 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5776 if self._match_text_seq("WITHIN", "GROUP"): 5777 order = self._parse_wrapped(self._parse_order) 5778 this = self.expression(exp.WithinGroup, this=this, expression=order) 5779 5780 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5781 # Some dialects choose to implement and some do not. 5782 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5783 5784 # There is some code above in _parse_lambda that handles 5785 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5786 5787 # The below changes handle 5788 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5789 5790 # Oracle allows both formats 5791 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5792 # and Snowflake chose to do the same for familiarity 5793 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5794 if isinstance(this, exp.AggFunc): 5795 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5796 5797 if ignore_respect and ignore_respect is not this: 5798 ignore_respect.replace(ignore_respect.this) 5799 this = self.expression(ignore_respect.__class__, this=this) 5800 5801 this = self._parse_respect_or_ignore_nulls(this) 5802 5803 # bigquery select from window x AS (partition by ...) 5804 if alias: 5805 over = None 5806 self._match(TokenType.ALIAS) 5807 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5808 return this 5809 else: 5810 over = self._prev.text.upper() 5811 5812 if comments and isinstance(func, exp.Expression): 5813 func.pop_comments() 5814 5815 if not self._match(TokenType.L_PAREN): 5816 return self.expression( 5817 exp.Window, 5818 comments=comments, 5819 this=this, 5820 alias=self._parse_id_var(False), 5821 over=over, 5822 ) 5823 5824 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5825 5826 first = self._match(TokenType.FIRST) 5827 if self._match_text_seq("LAST"): 5828 first = False 5829 5830 partition, order = self._parse_partition_and_order() 5831 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5832 5833 if kind: 5834 self._match(TokenType.BETWEEN) 5835 start = self._parse_window_spec() 5836 self._match(TokenType.AND) 5837 end = self._parse_window_spec() 5838 5839 spec = self.expression( 5840 exp.WindowSpec, 5841 kind=kind, 5842 start=start["value"], 5843 start_side=start["side"], 5844 end=end["value"], 5845 end_side=end["side"], 5846 ) 5847 else: 5848 spec = None 5849 5850 self._match_r_paren() 5851 5852 window = self.expression( 5853 exp.Window, 5854 comments=comments, 5855 this=this, 5856 partition_by=partition, 5857 order=order, 5858 spec=spec, 5859 alias=window_alias, 5860 over=over, 5861 first=first, 5862 ) 5863 5864 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5865 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5866 return self._parse_window(window, alias=alias) 5867 5868 return window 5869 5870 def _parse_partition_and_order( 5871 self, 5872 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5873 return self._parse_partition_by(), self._parse_order() 5874 5875 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5876 self._match(TokenType.BETWEEN) 5877 5878 return { 5879 "value": ( 5880 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5881 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5882 or self._parse_bitwise() 5883 ), 5884 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5885 } 5886 5887 def _parse_alias( 5888 self, this: t.Optional[exp.Expression], explicit: bool = False 5889 ) -> t.Optional[exp.Expression]: 5890 any_token = self._match(TokenType.ALIAS) 5891 comments = self._prev_comments or [] 5892 5893 if explicit and not any_token: 5894 return this 5895 5896 if self._match(TokenType.L_PAREN): 5897 aliases = self.expression( 5898 exp.Aliases, 5899 comments=comments, 5900 this=this, 5901 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5902 ) 5903 self._match_r_paren(aliases) 5904 return aliases 5905 5906 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5907 self.STRING_ALIASES and self._parse_string_as_identifier() 5908 ) 5909 5910 if alias: 5911 comments.extend(alias.pop_comments()) 5912 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5913 column = this.this 5914 5915 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5916 if not this.comments and column and column.comments: 5917 this.comments = column.pop_comments() 5918 5919 return this 5920 5921 def _parse_id_var( 5922 self, 5923 any_token: bool = True, 5924 tokens: t.Optional[t.Collection[TokenType]] = None, 5925 ) -> t.Optional[exp.Expression]: 5926 expression = self._parse_identifier() 5927 if not expression and ( 5928 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5929 ): 5930 quoted = self._prev.token_type == TokenType.STRING 5931 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5932 5933 return expression 5934 5935 def _parse_string(self) -> t.Optional[exp.Expression]: 5936 if self._match_set(self.STRING_PARSERS): 5937 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5938 return self._parse_placeholder() 5939 5940 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5941 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5942 5943 def _parse_number(self) -> t.Optional[exp.Expression]: 5944 if self._match_set(self.NUMERIC_PARSERS): 5945 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5946 return self._parse_placeholder() 5947 5948 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5949 if self._match(TokenType.IDENTIFIER): 5950 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5951 return self._parse_placeholder() 5952 5953 def _parse_var( 5954 self, 5955 any_token: bool = False, 5956 tokens: t.Optional[t.Collection[TokenType]] = None, 5957 upper: bool = False, 5958 ) -> t.Optional[exp.Expression]: 5959 if ( 5960 (any_token and self._advance_any()) 5961 or self._match(TokenType.VAR) 5962 or (self._match_set(tokens) if tokens else False) 5963 ): 5964 return self.expression( 5965 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5966 ) 5967 return self._parse_placeholder() 5968 5969 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5970 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5971 self._advance() 5972 return self._prev 5973 return None 5974 5975 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 5976 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 5977 5978 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5979 return self._parse_primary() or self._parse_var(any_token=True) 5980 5981 def _parse_null(self) -> t.Optional[exp.Expression]: 5982 if self._match_set(self.NULL_TOKENS): 5983 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5984 return self._parse_placeholder() 5985 5986 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5987 if self._match(TokenType.TRUE): 5988 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5989 if self._match(TokenType.FALSE): 5990 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5991 return self._parse_placeholder() 5992 5993 def _parse_star(self) -> t.Optional[exp.Expression]: 5994 if self._match(TokenType.STAR): 5995 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5996 return self._parse_placeholder() 5997 5998 def _parse_parameter(self) -> exp.Parameter: 5999 this = self._parse_identifier() or self._parse_primary_or_var() 6000 return self.expression(exp.Parameter, this=this) 6001 6002 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6003 if self._match_set(self.PLACEHOLDER_PARSERS): 6004 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6005 if placeholder: 6006 return placeholder 6007 self._advance(-1) 6008 return None 6009 6010 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6011 if not self._match_texts(keywords): 6012 return None 6013 if self._match(TokenType.L_PAREN, advance=False): 6014 return self._parse_wrapped_csv(self._parse_expression) 6015 6016 expression = self._parse_expression() 6017 return [expression] if expression else None 6018 6019 def _parse_csv( 6020 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6021 ) -> t.List[exp.Expression]: 6022 parse_result = parse_method() 6023 items = [parse_result] if parse_result is not None else [] 6024 6025 while self._match(sep): 6026 self._add_comments(parse_result) 6027 parse_result = parse_method() 6028 if parse_result is not None: 6029 items.append(parse_result) 6030 6031 return items 6032 6033 def _parse_tokens( 6034 self, parse_method: t.Callable, expressions: t.Dict 6035 ) -> t.Optional[exp.Expression]: 6036 this = parse_method() 6037 6038 while self._match_set(expressions): 6039 this = self.expression( 6040 expressions[self._prev.token_type], 6041 this=this, 6042 comments=self._prev_comments, 6043 expression=parse_method(), 6044 ) 6045 6046 return this 6047 6048 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6049 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6050 6051 def _parse_wrapped_csv( 6052 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6053 ) -> t.List[exp.Expression]: 6054 return self._parse_wrapped( 6055 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6056 ) 6057 6058 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6059 wrapped = self._match(TokenType.L_PAREN) 6060 if not wrapped and not optional: 6061 self.raise_error("Expecting (") 6062 parse_result = parse_method() 6063 if wrapped: 6064 self._match_r_paren() 6065 return parse_result 6066 6067 def _parse_expressions(self) -> t.List[exp.Expression]: 6068 return self._parse_csv(self._parse_expression) 6069 6070 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6071 return self._parse_select() or self._parse_set_operations( 6072 self._parse_expression() if alias else self._parse_assignment() 6073 ) 6074 6075 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6076 return self._parse_query_modifiers( 6077 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6078 ) 6079 6080 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6081 this = None 6082 if self._match_texts(self.TRANSACTION_KIND): 6083 this = self._prev.text 6084 6085 self._match_texts(("TRANSACTION", "WORK")) 6086 6087 modes = [] 6088 while True: 6089 mode = [] 6090 while self._match(TokenType.VAR): 6091 mode.append(self._prev.text) 6092 6093 if mode: 6094 modes.append(" ".join(mode)) 6095 if not self._match(TokenType.COMMA): 6096 break 6097 6098 return self.expression(exp.Transaction, this=this, modes=modes) 6099 6100 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6101 chain = None 6102 savepoint = None 6103 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6104 6105 self._match_texts(("TRANSACTION", "WORK")) 6106 6107 if self._match_text_seq("TO"): 6108 self._match_text_seq("SAVEPOINT") 6109 savepoint = self._parse_id_var() 6110 6111 if self._match(TokenType.AND): 6112 chain = not self._match_text_seq("NO") 6113 self._match_text_seq("CHAIN") 6114 6115 if is_rollback: 6116 return self.expression(exp.Rollback, savepoint=savepoint) 6117 6118 return self.expression(exp.Commit, chain=chain) 6119 6120 def _parse_refresh(self) -> exp.Refresh: 6121 self._match(TokenType.TABLE) 6122 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6123 6124 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6125 if not self._match_text_seq("ADD"): 6126 return None 6127 6128 self._match(TokenType.COLUMN) 6129 exists_column = self._parse_exists(not_=True) 6130 expression = self._parse_field_def() 6131 6132 if expression: 6133 expression.set("exists", exists_column) 6134 6135 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6136 if self._match_texts(("FIRST", "AFTER")): 6137 position = self._prev.text 6138 column_position = self.expression( 6139 exp.ColumnPosition, this=self._parse_column(), position=position 6140 ) 6141 expression.set("position", column_position) 6142 6143 return expression 6144 6145 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6146 drop = self._match(TokenType.DROP) and self._parse_drop() 6147 if drop and not isinstance(drop, exp.Command): 6148 drop.set("kind", drop.args.get("kind", "COLUMN")) 6149 return drop 6150 6151 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6152 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6153 return self.expression( 6154 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6155 ) 6156 6157 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6158 index = self._index - 1 6159 6160 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6161 return self._parse_csv( 6162 lambda: self.expression( 6163 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6164 ) 6165 ) 6166 6167 self._retreat(index) 6168 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6169 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6170 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6171 6172 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6173 if self._match_texts(self.ALTER_ALTER_PARSERS): 6174 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6175 6176 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6177 # keyword after ALTER we default to parsing this statement 6178 self._match(TokenType.COLUMN) 6179 column = self._parse_field(any_token=True) 6180 6181 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6182 return self.expression(exp.AlterColumn, this=column, drop=True) 6183 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6184 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6185 if self._match(TokenType.COMMENT): 6186 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6187 if self._match_text_seq("DROP", "NOT", "NULL"): 6188 return self.expression( 6189 exp.AlterColumn, 6190 this=column, 6191 drop=True, 6192 allow_null=True, 6193 ) 6194 if self._match_text_seq("SET", "NOT", "NULL"): 6195 return self.expression( 6196 exp.AlterColumn, 6197 this=column, 6198 allow_null=False, 6199 ) 6200 self._match_text_seq("SET", "DATA") 6201 self._match_text_seq("TYPE") 6202 return self.expression( 6203 exp.AlterColumn, 6204 this=column, 6205 dtype=self._parse_types(), 6206 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6207 using=self._match(TokenType.USING) and self._parse_assignment(), 6208 ) 6209 6210 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6211 if self._match_texts(("ALL", "EVEN", "AUTO")): 6212 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6213 6214 self._match_text_seq("KEY", "DISTKEY") 6215 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6216 6217 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6218 if compound: 6219 self._match_text_seq("SORTKEY") 6220 6221 if self._match(TokenType.L_PAREN, advance=False): 6222 return self.expression( 6223 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6224 ) 6225 6226 self._match_texts(("AUTO", "NONE")) 6227 return self.expression( 6228 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6229 ) 6230 6231 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6232 index = self._index - 1 6233 6234 partition_exists = self._parse_exists() 6235 if self._match(TokenType.PARTITION, advance=False): 6236 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6237 6238 self._retreat(index) 6239 return self._parse_csv(self._parse_drop_column) 6240 6241 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6242 if self._match(TokenType.COLUMN): 6243 exists = self._parse_exists() 6244 old_column = self._parse_column() 6245 to = self._match_text_seq("TO") 6246 new_column = self._parse_column() 6247 6248 if old_column is None or to is None or new_column is None: 6249 return None 6250 6251 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6252 6253 self._match_text_seq("TO") 6254 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6255 6256 def _parse_alter_table_set(self) -> exp.AlterSet: 6257 alter_set = self.expression(exp.AlterSet) 6258 6259 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6260 "TABLE", "PROPERTIES" 6261 ): 6262 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6263 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6264 alter_set.set("expressions", [self._parse_assignment()]) 6265 elif self._match_texts(("LOGGED", "UNLOGGED")): 6266 alter_set.set("option", exp.var(self._prev.text.upper())) 6267 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6268 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6269 elif self._match_text_seq("LOCATION"): 6270 alter_set.set("location", self._parse_field()) 6271 elif self._match_text_seq("ACCESS", "METHOD"): 6272 alter_set.set("access_method", self._parse_field()) 6273 elif self._match_text_seq("TABLESPACE"): 6274 alter_set.set("tablespace", self._parse_field()) 6275 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6276 alter_set.set("file_format", [self._parse_field()]) 6277 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6278 alter_set.set("file_format", self._parse_wrapped_options()) 6279 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6280 alter_set.set("copy_options", self._parse_wrapped_options()) 6281 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6282 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6283 else: 6284 if self._match_text_seq("SERDE"): 6285 alter_set.set("serde", self._parse_field()) 6286 6287 alter_set.set("expressions", [self._parse_properties()]) 6288 6289 return alter_set 6290 6291 def _parse_alter(self) -> exp.AlterTable | exp.Command: 6292 start = self._prev 6293 6294 if not self._match(TokenType.TABLE): 6295 return self._parse_as_command(start) 6296 6297 exists = self._parse_exists() 6298 only = self._match_text_seq("ONLY") 6299 this = self._parse_table(schema=True) 6300 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6301 6302 if self._next: 6303 self._advance() 6304 6305 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6306 if parser: 6307 actions = ensure_list(parser(self)) 6308 options = self._parse_csv(self._parse_property) 6309 6310 if not self._curr and actions: 6311 return self.expression( 6312 exp.AlterTable, 6313 this=this, 6314 exists=exists, 6315 actions=actions, 6316 only=only, 6317 options=options, 6318 cluster=cluster, 6319 ) 6320 6321 return self._parse_as_command(start) 6322 6323 def _parse_merge(self) -> exp.Merge: 6324 self._match(TokenType.INTO) 6325 target = self._parse_table() 6326 6327 if target and self._match(TokenType.ALIAS, advance=False): 6328 target.set("alias", self._parse_table_alias()) 6329 6330 self._match(TokenType.USING) 6331 using = self._parse_table() 6332 6333 self._match(TokenType.ON) 6334 on = self._parse_assignment() 6335 6336 return self.expression( 6337 exp.Merge, 6338 this=target, 6339 using=using, 6340 on=on, 6341 expressions=self._parse_when_matched(), 6342 ) 6343 6344 def _parse_when_matched(self) -> t.List[exp.When]: 6345 whens = [] 6346 6347 while self._match(TokenType.WHEN): 6348 matched = not self._match(TokenType.NOT) 6349 self._match_text_seq("MATCHED") 6350 source = ( 6351 False 6352 if self._match_text_seq("BY", "TARGET") 6353 else self._match_text_seq("BY", "SOURCE") 6354 ) 6355 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6356 6357 self._match(TokenType.THEN) 6358 6359 if self._match(TokenType.INSERT): 6360 _this = self._parse_star() 6361 if _this: 6362 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6363 else: 6364 then = self.expression( 6365 exp.Insert, 6366 this=self._parse_value(), 6367 expression=self._match_text_seq("VALUES") and self._parse_value(), 6368 ) 6369 elif self._match(TokenType.UPDATE): 6370 expressions = self._parse_star() 6371 if expressions: 6372 then = self.expression(exp.Update, expressions=expressions) 6373 else: 6374 then = self.expression( 6375 exp.Update, 6376 expressions=self._match(TokenType.SET) 6377 and self._parse_csv(self._parse_equality), 6378 ) 6379 elif self._match(TokenType.DELETE): 6380 then = self.expression(exp.Var, this=self._prev.text) 6381 else: 6382 then = None 6383 6384 whens.append( 6385 self.expression( 6386 exp.When, 6387 matched=matched, 6388 source=source, 6389 condition=condition, 6390 then=then, 6391 ) 6392 ) 6393 return whens 6394 6395 def _parse_show(self) -> t.Optional[exp.Expression]: 6396 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6397 if parser: 6398 return parser(self) 6399 return self._parse_as_command(self._prev) 6400 6401 def _parse_set_item_assignment( 6402 self, kind: t.Optional[str] = None 6403 ) -> t.Optional[exp.Expression]: 6404 index = self._index 6405 6406 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6407 return self._parse_set_transaction(global_=kind == "GLOBAL") 6408 6409 left = self._parse_primary() or self._parse_column() 6410 assignment_delimiter = self._match_texts(("=", "TO")) 6411 6412 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6413 self._retreat(index) 6414 return None 6415 6416 right = self._parse_statement() or self._parse_id_var() 6417 if isinstance(right, (exp.Column, exp.Identifier)): 6418 right = exp.var(right.name) 6419 6420 this = self.expression(exp.EQ, this=left, expression=right) 6421 return self.expression(exp.SetItem, this=this, kind=kind) 6422 6423 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6424 self._match_text_seq("TRANSACTION") 6425 characteristics = self._parse_csv( 6426 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6427 ) 6428 return self.expression( 6429 exp.SetItem, 6430 expressions=characteristics, 6431 kind="TRANSACTION", 6432 **{"global": global_}, # type: ignore 6433 ) 6434 6435 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6436 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6437 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6438 6439 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6440 index = self._index 6441 set_ = self.expression( 6442 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6443 ) 6444 6445 if self._curr: 6446 self._retreat(index) 6447 return self._parse_as_command(self._prev) 6448 6449 return set_ 6450 6451 def _parse_var_from_options( 6452 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6453 ) -> t.Optional[exp.Var]: 6454 start = self._curr 6455 if not start: 6456 return None 6457 6458 option = start.text.upper() 6459 continuations = options.get(option) 6460 6461 index = self._index 6462 self._advance() 6463 for keywords in continuations or []: 6464 if isinstance(keywords, str): 6465 keywords = (keywords,) 6466 6467 if self._match_text_seq(*keywords): 6468 option = f"{option} {' '.join(keywords)}" 6469 break 6470 else: 6471 if continuations or continuations is None: 6472 if raise_unmatched: 6473 self.raise_error(f"Unknown option {option}") 6474 6475 self._retreat(index) 6476 return None 6477 6478 return exp.var(option) 6479 6480 def _parse_as_command(self, start: Token) -> exp.Command: 6481 while self._curr: 6482 self._advance() 6483 text = self._find_sql(start, self._prev) 6484 size = len(start.text) 6485 self._warn_unsupported() 6486 return exp.Command(this=text[:size], expression=text[size:]) 6487 6488 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6489 settings = [] 6490 6491 self._match_l_paren() 6492 kind = self._parse_id_var() 6493 6494 if self._match(TokenType.L_PAREN): 6495 while True: 6496 key = self._parse_id_var() 6497 value = self._parse_primary() 6498 6499 if not key and value is None: 6500 break 6501 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6502 self._match(TokenType.R_PAREN) 6503 6504 self._match_r_paren() 6505 6506 return self.expression( 6507 exp.DictProperty, 6508 this=this, 6509 kind=kind.this if kind else None, 6510 settings=settings, 6511 ) 6512 6513 def _parse_dict_range(self, this: str) -> exp.DictRange: 6514 self._match_l_paren() 6515 has_min = self._match_text_seq("MIN") 6516 if has_min: 6517 min = self._parse_var() or self._parse_primary() 6518 self._match_text_seq("MAX") 6519 max = self._parse_var() or self._parse_primary() 6520 else: 6521 max = self._parse_var() or self._parse_primary() 6522 min = exp.Literal.number(0) 6523 self._match_r_paren() 6524 return self.expression(exp.DictRange, this=this, min=min, max=max) 6525 6526 def _parse_comprehension( 6527 self, this: t.Optional[exp.Expression] 6528 ) -> t.Optional[exp.Comprehension]: 6529 index = self._index 6530 expression = self._parse_column() 6531 if not self._match(TokenType.IN): 6532 self._retreat(index - 1) 6533 return None 6534 iterator = self._parse_column() 6535 condition = self._parse_assignment() if self._match_text_seq("IF") else None 6536 return self.expression( 6537 exp.Comprehension, 6538 this=this, 6539 expression=expression, 6540 iterator=iterator, 6541 condition=condition, 6542 ) 6543 6544 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6545 if self._match(TokenType.HEREDOC_STRING): 6546 return self.expression(exp.Heredoc, this=self._prev.text) 6547 6548 if not self._match_text_seq("$"): 6549 return None 6550 6551 tags = ["$"] 6552 tag_text = None 6553 6554 if self._is_connected(): 6555 self._advance() 6556 tags.append(self._prev.text.upper()) 6557 else: 6558 self.raise_error("No closing $ found") 6559 6560 if tags[-1] != "$": 6561 if self._is_connected() and self._match_text_seq("$"): 6562 tag_text = tags[-1] 6563 tags.append("$") 6564 else: 6565 self.raise_error("No closing $ found") 6566 6567 heredoc_start = self._curr 6568 6569 while self._curr: 6570 if self._match_text_seq(*tags, advance=False): 6571 this = self._find_sql(heredoc_start, self._prev) 6572 self._advance(len(tags)) 6573 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6574 6575 self._advance() 6576 6577 self.raise_error(f"No closing {''.join(tags)} found") 6578 return None 6579 6580 def _find_parser( 6581 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6582 ) -> t.Optional[t.Callable]: 6583 if not self._curr: 6584 return None 6585 6586 index = self._index 6587 this = [] 6588 while True: 6589 # The current token might be multiple words 6590 curr = self._curr.text.upper() 6591 key = curr.split(" ") 6592 this.append(curr) 6593 6594 self._advance() 6595 result, trie = in_trie(trie, key) 6596 if result == TrieResult.FAILED: 6597 break 6598 6599 if result == TrieResult.EXISTS: 6600 subparser = parsers[" ".join(this)] 6601 return subparser 6602 6603 self._retreat(index) 6604 return None 6605 6606 def _match(self, token_type, advance=True, expression=None): 6607 if not self._curr: 6608 return None 6609 6610 if self._curr.token_type == token_type: 6611 if advance: 6612 self._advance() 6613 self._add_comments(expression) 6614 return True 6615 6616 return None 6617 6618 def _match_set(self, types, advance=True): 6619 if not self._curr: 6620 return None 6621 6622 if self._curr.token_type in types: 6623 if advance: 6624 self._advance() 6625 return True 6626 6627 return None 6628 6629 def _match_pair(self, token_type_a, token_type_b, advance=True): 6630 if not self._curr or not self._next: 6631 return None 6632 6633 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6634 if advance: 6635 self._advance(2) 6636 return True 6637 6638 return None 6639 6640 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6641 if not self._match(TokenType.L_PAREN, expression=expression): 6642 self.raise_error("Expecting (") 6643 6644 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6645 if not self._match(TokenType.R_PAREN, expression=expression): 6646 self.raise_error("Expecting )") 6647 6648 def _match_texts(self, texts, advance=True): 6649 if self._curr and self._curr.text.upper() in texts: 6650 if advance: 6651 self._advance() 6652 return True 6653 return None 6654 6655 def _match_text_seq(self, *texts, advance=True): 6656 index = self._index 6657 for text in texts: 6658 if self._curr and self._curr.text.upper() == text: 6659 self._advance() 6660 else: 6661 self._retreat(index) 6662 return None 6663 6664 if not advance: 6665 self._retreat(index) 6666 6667 return True 6668 6669 def _replace_lambda( 6670 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 6671 ) -> t.Optional[exp.Expression]: 6672 if not node: 6673 return node 6674 6675 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 6676 6677 for column in node.find_all(exp.Column): 6678 typ = lambda_types.get(column.parts[0].name) 6679 if typ is not None: 6680 dot_or_id = column.to_dot() if column.table else column.this 6681 6682 if typ: 6683 dot_or_id = self.expression( 6684 exp.Cast, 6685 this=dot_or_id, 6686 to=typ, 6687 ) 6688 6689 parent = column.parent 6690 6691 while isinstance(parent, exp.Dot): 6692 if not isinstance(parent.parent, exp.Dot): 6693 parent.replace(dot_or_id) 6694 break 6695 parent = parent.parent 6696 else: 6697 if column is node: 6698 node = dot_or_id 6699 else: 6700 column.replace(dot_or_id) 6701 return node 6702 6703 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6704 start = self._prev 6705 6706 # Not to be confused with TRUNCATE(number, decimals) function call 6707 if self._match(TokenType.L_PAREN): 6708 self._retreat(self._index - 2) 6709 return self._parse_function() 6710 6711 # Clickhouse supports TRUNCATE DATABASE as well 6712 is_database = self._match(TokenType.DATABASE) 6713 6714 self._match(TokenType.TABLE) 6715 6716 exists = self._parse_exists(not_=False) 6717 6718 expressions = self._parse_csv( 6719 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6720 ) 6721 6722 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6723 6724 if self._match_text_seq("RESTART", "IDENTITY"): 6725 identity = "RESTART" 6726 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6727 identity = "CONTINUE" 6728 else: 6729 identity = None 6730 6731 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6732 option = self._prev.text 6733 else: 6734 option = None 6735 6736 partition = self._parse_partition() 6737 6738 # Fallback case 6739 if self._curr: 6740 return self._parse_as_command(start) 6741 6742 return self.expression( 6743 exp.TruncateTable, 6744 expressions=expressions, 6745 is_database=is_database, 6746 exists=exists, 6747 cluster=cluster, 6748 identity=identity, 6749 option=option, 6750 partition=partition, 6751 ) 6752 6753 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6754 this = self._parse_ordered(self._parse_opclass) 6755 6756 if not self._match(TokenType.WITH): 6757 return this 6758 6759 op = self._parse_var(any_token=True) 6760 6761 return self.expression(exp.WithOperator, this=this, op=op) 6762 6763 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6764 self._match(TokenType.EQ) 6765 self._match(TokenType.L_PAREN) 6766 6767 opts: t.List[t.Optional[exp.Expression]] = [] 6768 while self._curr and not self._match(TokenType.R_PAREN): 6769 if self._match_text_seq("FORMAT_NAME", "="): 6770 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 6771 # so we parse it separately to use _parse_field() 6772 prop = self.expression( 6773 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 6774 ) 6775 opts.append(prop) 6776 else: 6777 opts.append(self._parse_property()) 6778 6779 self._match(TokenType.COMMA) 6780 6781 return opts 6782 6783 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6784 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6785 6786 options = [] 6787 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6788 option = self._parse_var(any_token=True) 6789 prev = self._prev.text.upper() 6790 6791 # Different dialects might separate options and values by white space, "=" and "AS" 6792 self._match(TokenType.EQ) 6793 self._match(TokenType.ALIAS) 6794 6795 param = self.expression(exp.CopyParameter, this=option) 6796 6797 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 6798 TokenType.L_PAREN, advance=False 6799 ): 6800 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 6801 param.set("expressions", self._parse_wrapped_options()) 6802 elif prev == "FILE_FORMAT": 6803 # T-SQL's external file format case 6804 param.set("expression", self._parse_field()) 6805 else: 6806 param.set("expression", self._parse_unquoted_field()) 6807 6808 options.append(param) 6809 self._match(sep) 6810 6811 return options 6812 6813 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6814 expr = self.expression(exp.Credentials) 6815 6816 if self._match_text_seq("STORAGE_INTEGRATION", "="): 6817 expr.set("storage", self._parse_field()) 6818 if self._match_text_seq("CREDENTIALS"): 6819 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 6820 creds = ( 6821 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6822 ) 6823 expr.set("credentials", creds) 6824 if self._match_text_seq("ENCRYPTION"): 6825 expr.set("encryption", self._parse_wrapped_options()) 6826 if self._match_text_seq("IAM_ROLE"): 6827 expr.set("iam_role", self._parse_field()) 6828 if self._match_text_seq("REGION"): 6829 expr.set("region", self._parse_field()) 6830 6831 return expr 6832 6833 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6834 return self._parse_field() 6835 6836 def _parse_copy(self) -> exp.Copy | exp.Command: 6837 start = self._prev 6838 6839 self._match(TokenType.INTO) 6840 6841 this = ( 6842 self._parse_select(nested=True, parse_subquery_alias=False) 6843 if self._match(TokenType.L_PAREN, advance=False) 6844 else self._parse_table(schema=True) 6845 ) 6846 6847 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6848 6849 files = self._parse_csv(self._parse_file_location) 6850 credentials = self._parse_credentials() 6851 6852 self._match_text_seq("WITH") 6853 6854 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6855 6856 # Fallback case 6857 if self._curr: 6858 return self._parse_as_command(start) 6859 6860 return self.expression( 6861 exp.Copy, 6862 this=this, 6863 kind=kind, 6864 credentials=credentials, 6865 files=files, 6866 params=params, 6867 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1242 def __init__( 1243 self, 1244 error_level: t.Optional[ErrorLevel] = None, 1245 error_message_context: int = 100, 1246 max_errors: int = 3, 1247 dialect: DialectType = None, 1248 ): 1249 from sqlglot.dialects import Dialect 1250 1251 self.error_level = error_level or ErrorLevel.IMMEDIATE 1252 self.error_message_context = error_message_context 1253 self.max_errors = max_errors 1254 self.dialect = Dialect.get_or_raise(dialect) 1255 self.reset()
1267 def parse( 1268 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1269 ) -> t.List[t.Optional[exp.Expression]]: 1270 """ 1271 Parses a list of tokens and returns a list of syntax trees, one tree 1272 per parsed SQL statement. 1273 1274 Args: 1275 raw_tokens: The list of tokens. 1276 sql: The original SQL string, used to produce helpful debug messages. 1277 1278 Returns: 1279 The list of the produced syntax trees. 1280 """ 1281 return self._parse( 1282 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1283 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1285 def parse_into( 1286 self, 1287 expression_types: exp.IntoType, 1288 raw_tokens: t.List[Token], 1289 sql: t.Optional[str] = None, 1290 ) -> t.List[t.Optional[exp.Expression]]: 1291 """ 1292 Parses a list of tokens into a given Expression type. If a collection of Expression 1293 types is given instead, this method will try to parse the token list into each one 1294 of them, stopping at the first for which the parsing succeeds. 1295 1296 Args: 1297 expression_types: The expression type(s) to try and parse the token list into. 1298 raw_tokens: The list of tokens. 1299 sql: The original SQL string, used to produce helpful debug messages. 1300 1301 Returns: 1302 The target Expression. 1303 """ 1304 errors = [] 1305 for expression_type in ensure_list(expression_types): 1306 parser = self.EXPRESSION_PARSERS.get(expression_type) 1307 if not parser: 1308 raise TypeError(f"No parser registered for {expression_type}") 1309 1310 try: 1311 return self._parse(parser, raw_tokens, sql) 1312 except ParseError as e: 1313 e.errors[0]["into_expression"] = expression_type 1314 errors.append(e) 1315 1316 raise ParseError( 1317 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1318 errors=merge_errors(errors), 1319 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1359 def check_errors(self) -> None: 1360 """Logs or raises any found errors, depending on the chosen error level setting.""" 1361 if self.error_level == ErrorLevel.WARN: 1362 for error in self.errors: 1363 logger.error(str(error)) 1364 elif self.error_level == ErrorLevel.RAISE and self.errors: 1365 raise ParseError( 1366 concat_messages(self.errors, self.max_errors), 1367 errors=merge_errors(self.errors), 1368 )
Logs or raises any found errors, depending on the chosen error level setting.
1370 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1371 """ 1372 Appends an error in the list of recorded errors or raises it, depending on the chosen 1373 error level setting. 1374 """ 1375 token = token or self._curr or self._prev or Token.string("") 1376 start = token.start 1377 end = token.end + 1 1378 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1379 highlight = self.sql[start:end] 1380 end_context = self.sql[end : end + self.error_message_context] 1381 1382 error = ParseError.new( 1383 f"{message}. Line {token.line}, Col: {token.col}.\n" 1384 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1385 description=message, 1386 line=token.line, 1387 col=token.col, 1388 start_context=start_context, 1389 highlight=highlight, 1390 end_context=end_context, 1391 ) 1392 1393 if self.error_level == ErrorLevel.IMMEDIATE: 1394 raise error 1395 1396 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1398 def expression( 1399 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1400 ) -> E: 1401 """ 1402 Creates a new, validated Expression. 1403 1404 Args: 1405 exp_class: The expression class to instantiate. 1406 comments: An optional list of comments to attach to the expression. 1407 kwargs: The arguments to set for the expression along with their respective values. 1408 1409 Returns: 1410 The target expression. 1411 """ 1412 instance = exp_class(**kwargs) 1413 instance.add_comments(comments) if comments else self._add_comments(instance) 1414 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1421 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1422 """ 1423 Validates an Expression, making sure that all its mandatory arguments are set. 1424 1425 Args: 1426 expression: The expression to validate. 1427 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1428 1429 Returns: 1430 The validated expression. 1431 """ 1432 if self.error_level != ErrorLevel.IGNORE: 1433 for error_message in expression.error_messages(args): 1434 self.raise_error(error_message) 1435 1436 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.