sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 65 def _builder(args: t.List, dialect: Dialect) -> E: 66 expression = expr_type( 67 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 68 ) 69 if len(args) > 2 and expr_type is exp.JSONExtract: 70 expression.set("expressions", args[2:]) 71 72 return expression 73 74 return _builder 75 76 77class _Parser(type): 78 def __new__(cls, clsname, bases, attrs): 79 klass = super().__new__(cls, clsname, bases, attrs) 80 81 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 82 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 83 84 return klass 85 86 87class Parser(metaclass=_Parser): 88 """ 89 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 90 91 Args: 92 error_level: The desired error level. 93 Default: ErrorLevel.IMMEDIATE 94 error_message_context: The amount of context to capture from a query string when displaying 95 the error message (in number of characters). 96 Default: 100 97 max_errors: Maximum number of error messages to include in a raised ParseError. 98 This is only relevant if error_level is ErrorLevel.RAISE. 99 Default: 3 100 """ 101 102 FUNCTIONS: t.Dict[str, t.Callable] = { 103 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 104 "CONCAT": lambda args, dialect: exp.Concat( 105 expressions=args, 106 safe=not dialect.STRICT_STRING_CONCAT, 107 coalesce=dialect.CONCAT_COALESCE, 108 ), 109 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 110 expressions=args, 111 safe=not dialect.STRICT_STRING_CONCAT, 112 coalesce=dialect.CONCAT_COALESCE, 113 ), 114 "DATE_TO_DATE_STR": lambda args: exp.Cast( 115 this=seq_get(args, 0), 116 to=exp.DataType(this=exp.DataType.Type.TEXT), 117 ), 118 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 119 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 120 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 121 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 122 "LIKE": build_like, 123 "LOG": build_logarithm, 124 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 125 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 126 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 127 "TIME_TO_TIME_STR": lambda args: exp.Cast( 128 this=seq_get(args, 0), 129 to=exp.DataType(this=exp.DataType.Type.TEXT), 130 ), 131 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 132 this=exp.Cast( 133 this=seq_get(args, 0), 134 to=exp.DataType(this=exp.DataType.Type.TEXT), 135 ), 136 start=exp.Literal.number(1), 137 length=exp.Literal.number(10), 138 ), 139 "VAR_MAP": build_var_map, 140 } 141 142 NO_PAREN_FUNCTIONS = { 143 TokenType.CURRENT_DATE: exp.CurrentDate, 144 TokenType.CURRENT_DATETIME: exp.CurrentDate, 145 TokenType.CURRENT_TIME: exp.CurrentTime, 146 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 147 TokenType.CURRENT_USER: exp.CurrentUser, 148 } 149 150 STRUCT_TYPE_TOKENS = { 151 TokenType.NESTED, 152 TokenType.OBJECT, 153 TokenType.STRUCT, 154 } 155 156 NESTED_TYPE_TOKENS = { 157 TokenType.ARRAY, 158 TokenType.LOWCARDINALITY, 159 TokenType.MAP, 160 TokenType.NULLABLE, 161 *STRUCT_TYPE_TOKENS, 162 } 163 164 ENUM_TYPE_TOKENS = { 165 TokenType.ENUM, 166 TokenType.ENUM8, 167 TokenType.ENUM16, 168 } 169 170 AGGREGATE_TYPE_TOKENS = { 171 TokenType.AGGREGATEFUNCTION, 172 TokenType.SIMPLEAGGREGATEFUNCTION, 173 } 174 175 TYPE_TOKENS = { 176 TokenType.BIT, 177 TokenType.BOOLEAN, 178 TokenType.TINYINT, 179 TokenType.UTINYINT, 180 TokenType.SMALLINT, 181 TokenType.USMALLINT, 182 TokenType.INT, 183 TokenType.UINT, 184 TokenType.BIGINT, 185 TokenType.UBIGINT, 186 TokenType.INT128, 187 TokenType.UINT128, 188 TokenType.INT256, 189 TokenType.UINT256, 190 TokenType.MEDIUMINT, 191 TokenType.UMEDIUMINT, 192 TokenType.FIXEDSTRING, 193 TokenType.FLOAT, 194 TokenType.DOUBLE, 195 TokenType.CHAR, 196 TokenType.NCHAR, 197 TokenType.VARCHAR, 198 TokenType.NVARCHAR, 199 TokenType.BPCHAR, 200 TokenType.TEXT, 201 TokenType.MEDIUMTEXT, 202 TokenType.LONGTEXT, 203 TokenType.MEDIUMBLOB, 204 TokenType.LONGBLOB, 205 TokenType.BINARY, 206 TokenType.VARBINARY, 207 TokenType.JSON, 208 TokenType.JSONB, 209 TokenType.INTERVAL, 210 TokenType.TINYBLOB, 211 TokenType.TINYTEXT, 212 TokenType.TIME, 213 TokenType.TIMETZ, 214 TokenType.TIMESTAMP, 215 TokenType.TIMESTAMP_S, 216 TokenType.TIMESTAMP_MS, 217 TokenType.TIMESTAMP_NS, 218 TokenType.TIMESTAMPTZ, 219 TokenType.TIMESTAMPLTZ, 220 TokenType.DATETIME, 221 TokenType.DATETIME64, 222 TokenType.DATE, 223 TokenType.DATE32, 224 TokenType.INT4RANGE, 225 TokenType.INT4MULTIRANGE, 226 TokenType.INT8RANGE, 227 TokenType.INT8MULTIRANGE, 228 TokenType.NUMRANGE, 229 TokenType.NUMMULTIRANGE, 230 TokenType.TSRANGE, 231 TokenType.TSMULTIRANGE, 232 TokenType.TSTZRANGE, 233 TokenType.TSTZMULTIRANGE, 234 TokenType.DATERANGE, 235 TokenType.DATEMULTIRANGE, 236 TokenType.DECIMAL, 237 TokenType.UDECIMAL, 238 TokenType.BIGDECIMAL, 239 TokenType.UUID, 240 TokenType.GEOGRAPHY, 241 TokenType.GEOMETRY, 242 TokenType.HLLSKETCH, 243 TokenType.HSTORE, 244 TokenType.PSEUDO_TYPE, 245 TokenType.SUPER, 246 TokenType.SERIAL, 247 TokenType.SMALLSERIAL, 248 TokenType.BIGSERIAL, 249 TokenType.XML, 250 TokenType.YEAR, 251 TokenType.UNIQUEIDENTIFIER, 252 TokenType.USERDEFINED, 253 TokenType.MONEY, 254 TokenType.SMALLMONEY, 255 TokenType.ROWVERSION, 256 TokenType.IMAGE, 257 TokenType.VARIANT, 258 TokenType.OBJECT, 259 TokenType.OBJECT_IDENTIFIER, 260 TokenType.INET, 261 TokenType.IPADDRESS, 262 TokenType.IPPREFIX, 263 TokenType.IPV4, 264 TokenType.IPV6, 265 TokenType.UNKNOWN, 266 TokenType.NULL, 267 TokenType.NAME, 268 *ENUM_TYPE_TOKENS, 269 *NESTED_TYPE_TOKENS, 270 *AGGREGATE_TYPE_TOKENS, 271 } 272 273 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 274 TokenType.BIGINT: TokenType.UBIGINT, 275 TokenType.INT: TokenType.UINT, 276 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 277 TokenType.SMALLINT: TokenType.USMALLINT, 278 TokenType.TINYINT: TokenType.UTINYINT, 279 TokenType.DECIMAL: TokenType.UDECIMAL, 280 } 281 282 SUBQUERY_PREDICATES = { 283 TokenType.ANY: exp.Any, 284 TokenType.ALL: exp.All, 285 TokenType.EXISTS: exp.Exists, 286 TokenType.SOME: exp.Any, 287 } 288 289 RESERVED_TOKENS = { 290 *Tokenizer.SINGLE_TOKENS.values(), 291 TokenType.SELECT, 292 } 293 294 DB_CREATABLES = { 295 TokenType.DATABASE, 296 TokenType.SCHEMA, 297 TokenType.TABLE, 298 TokenType.VIEW, 299 TokenType.MODEL, 300 TokenType.DICTIONARY, 301 TokenType.SEQUENCE, 302 TokenType.STORAGE_INTEGRATION, 303 } 304 305 CREATABLES = { 306 TokenType.COLUMN, 307 TokenType.CONSTRAINT, 308 TokenType.FUNCTION, 309 TokenType.INDEX, 310 TokenType.PROCEDURE, 311 TokenType.FOREIGN_KEY, 312 *DB_CREATABLES, 313 } 314 315 # Tokens that can represent identifiers 316 ID_VAR_TOKENS = { 317 TokenType.VAR, 318 TokenType.ANTI, 319 TokenType.APPLY, 320 TokenType.ASC, 321 TokenType.ASOF, 322 TokenType.AUTO_INCREMENT, 323 TokenType.BEGIN, 324 TokenType.BPCHAR, 325 TokenType.CACHE, 326 TokenType.CASE, 327 TokenType.COLLATE, 328 TokenType.COMMAND, 329 TokenType.COMMENT, 330 TokenType.COMMIT, 331 TokenType.CONSTRAINT, 332 TokenType.DEFAULT, 333 TokenType.DELETE, 334 TokenType.DESC, 335 TokenType.DESCRIBE, 336 TokenType.DICTIONARY, 337 TokenType.DIV, 338 TokenType.END, 339 TokenType.EXECUTE, 340 TokenType.ESCAPE, 341 TokenType.FALSE, 342 TokenType.FIRST, 343 TokenType.FILTER, 344 TokenType.FINAL, 345 TokenType.FORMAT, 346 TokenType.FULL, 347 TokenType.IDENTIFIER, 348 TokenType.IS, 349 TokenType.ISNULL, 350 TokenType.INTERVAL, 351 TokenType.KEEP, 352 TokenType.KILL, 353 TokenType.LEFT, 354 TokenType.LOAD, 355 TokenType.MERGE, 356 TokenType.NATURAL, 357 TokenType.NEXT, 358 TokenType.OFFSET, 359 TokenType.OPERATOR, 360 TokenType.ORDINALITY, 361 TokenType.OVERLAPS, 362 TokenType.OVERWRITE, 363 TokenType.PARTITION, 364 TokenType.PERCENT, 365 TokenType.PIVOT, 366 TokenType.PRAGMA, 367 TokenType.RANGE, 368 TokenType.RECURSIVE, 369 TokenType.REFERENCES, 370 TokenType.REFRESH, 371 TokenType.REPLACE, 372 TokenType.RIGHT, 373 TokenType.ROW, 374 TokenType.ROWS, 375 TokenType.SEMI, 376 TokenType.SET, 377 TokenType.SETTINGS, 378 TokenType.SHOW, 379 TokenType.TEMPORARY, 380 TokenType.TOP, 381 TokenType.TRUE, 382 TokenType.TRUNCATE, 383 TokenType.UNIQUE, 384 TokenType.UNPIVOT, 385 TokenType.UPDATE, 386 TokenType.USE, 387 TokenType.VOLATILE, 388 TokenType.WINDOW, 389 *CREATABLES, 390 *SUBQUERY_PREDICATES, 391 *TYPE_TOKENS, 392 *NO_PAREN_FUNCTIONS, 393 } 394 395 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 396 397 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 398 TokenType.ANTI, 399 TokenType.APPLY, 400 TokenType.ASOF, 401 TokenType.FULL, 402 TokenType.LEFT, 403 TokenType.LOCK, 404 TokenType.NATURAL, 405 TokenType.OFFSET, 406 TokenType.RIGHT, 407 TokenType.SEMI, 408 TokenType.WINDOW, 409 } 410 411 ALIAS_TOKENS = ID_VAR_TOKENS 412 413 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 414 415 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 416 417 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 418 419 FUNC_TOKENS = { 420 TokenType.COLLATE, 421 TokenType.COMMAND, 422 TokenType.CURRENT_DATE, 423 TokenType.CURRENT_DATETIME, 424 TokenType.CURRENT_TIMESTAMP, 425 TokenType.CURRENT_TIME, 426 TokenType.CURRENT_USER, 427 TokenType.FILTER, 428 TokenType.FIRST, 429 TokenType.FORMAT, 430 TokenType.GLOB, 431 TokenType.IDENTIFIER, 432 TokenType.INDEX, 433 TokenType.ISNULL, 434 TokenType.ILIKE, 435 TokenType.INSERT, 436 TokenType.LIKE, 437 TokenType.MERGE, 438 TokenType.OFFSET, 439 TokenType.PRIMARY_KEY, 440 TokenType.RANGE, 441 TokenType.REPLACE, 442 TokenType.RLIKE, 443 TokenType.ROW, 444 TokenType.UNNEST, 445 TokenType.VAR, 446 TokenType.LEFT, 447 TokenType.RIGHT, 448 TokenType.SEQUENCE, 449 TokenType.DATE, 450 TokenType.DATETIME, 451 TokenType.TABLE, 452 TokenType.TIMESTAMP, 453 TokenType.TIMESTAMPTZ, 454 TokenType.TRUNCATE, 455 TokenType.WINDOW, 456 TokenType.XOR, 457 *TYPE_TOKENS, 458 *SUBQUERY_PREDICATES, 459 } 460 461 CONJUNCTION = { 462 TokenType.AND: exp.And, 463 TokenType.OR: exp.Or, 464 } 465 466 EQUALITY = { 467 TokenType.COLON_EQ: exp.PropertyEQ, 468 TokenType.EQ: exp.EQ, 469 TokenType.NEQ: exp.NEQ, 470 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 471 } 472 473 COMPARISON = { 474 TokenType.GT: exp.GT, 475 TokenType.GTE: exp.GTE, 476 TokenType.LT: exp.LT, 477 TokenType.LTE: exp.LTE, 478 } 479 480 BITWISE = { 481 TokenType.AMP: exp.BitwiseAnd, 482 TokenType.CARET: exp.BitwiseXor, 483 TokenType.PIPE: exp.BitwiseOr, 484 } 485 486 TERM = { 487 TokenType.DASH: exp.Sub, 488 TokenType.PLUS: exp.Add, 489 TokenType.MOD: exp.Mod, 490 TokenType.COLLATE: exp.Collate, 491 } 492 493 FACTOR = { 494 TokenType.DIV: exp.IntDiv, 495 TokenType.LR_ARROW: exp.Distance, 496 TokenType.SLASH: exp.Div, 497 TokenType.STAR: exp.Mul, 498 } 499 500 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 501 502 TIMES = { 503 TokenType.TIME, 504 TokenType.TIMETZ, 505 } 506 507 TIMESTAMPS = { 508 TokenType.TIMESTAMP, 509 TokenType.TIMESTAMPTZ, 510 TokenType.TIMESTAMPLTZ, 511 *TIMES, 512 } 513 514 SET_OPERATIONS = { 515 TokenType.UNION, 516 TokenType.INTERSECT, 517 TokenType.EXCEPT, 518 } 519 520 JOIN_METHODS = { 521 TokenType.ASOF, 522 TokenType.NATURAL, 523 TokenType.POSITIONAL, 524 } 525 526 JOIN_SIDES = { 527 TokenType.LEFT, 528 TokenType.RIGHT, 529 TokenType.FULL, 530 } 531 532 JOIN_KINDS = { 533 TokenType.INNER, 534 TokenType.OUTER, 535 TokenType.CROSS, 536 TokenType.SEMI, 537 TokenType.ANTI, 538 } 539 540 JOIN_HINTS: t.Set[str] = set() 541 542 LAMBDAS = { 543 TokenType.ARROW: lambda self, expressions: self.expression( 544 exp.Lambda, 545 this=self._replace_lambda( 546 self._parse_conjunction(), 547 {node.name for node in expressions}, 548 ), 549 expressions=expressions, 550 ), 551 TokenType.FARROW: lambda self, expressions: self.expression( 552 exp.Kwarg, 553 this=exp.var(expressions[0].name), 554 expression=self._parse_conjunction(), 555 ), 556 } 557 558 COLUMN_OPERATORS = { 559 TokenType.DOT: None, 560 TokenType.DCOLON: lambda self, this, to: self.expression( 561 exp.Cast if self.STRICT_CAST else exp.TryCast, 562 this=this, 563 to=to, 564 ), 565 TokenType.ARROW: lambda self, this, path: self.expression( 566 exp.JSONExtract, 567 this=this, 568 expression=self.dialect.to_json_path(path), 569 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 570 ), 571 TokenType.DARROW: lambda self, this, path: self.expression( 572 exp.JSONExtractScalar, 573 this=this, 574 expression=self.dialect.to_json_path(path), 575 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 576 ), 577 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 578 exp.JSONBExtract, 579 this=this, 580 expression=path, 581 ), 582 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 583 exp.JSONBExtractScalar, 584 this=this, 585 expression=path, 586 ), 587 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 588 exp.JSONBContains, 589 this=this, 590 expression=key, 591 ), 592 } 593 594 EXPRESSION_PARSERS = { 595 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 596 exp.Column: lambda self: self._parse_column(), 597 exp.Condition: lambda self: self._parse_conjunction(), 598 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 599 exp.Expression: lambda self: self._parse_expression(), 600 exp.From: lambda self: self._parse_from(), 601 exp.Group: lambda self: self._parse_group(), 602 exp.Having: lambda self: self._parse_having(), 603 exp.Identifier: lambda self: self._parse_id_var(), 604 exp.Join: lambda self: self._parse_join(), 605 exp.Lambda: lambda self: self._parse_lambda(), 606 exp.Lateral: lambda self: self._parse_lateral(), 607 exp.Limit: lambda self: self._parse_limit(), 608 exp.Offset: lambda self: self._parse_offset(), 609 exp.Order: lambda self: self._parse_order(), 610 exp.Ordered: lambda self: self._parse_ordered(), 611 exp.Properties: lambda self: self._parse_properties(), 612 exp.Qualify: lambda self: self._parse_qualify(), 613 exp.Returning: lambda self: self._parse_returning(), 614 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 615 exp.Table: lambda self: self._parse_table_parts(), 616 exp.TableAlias: lambda self: self._parse_table_alias(), 617 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 618 exp.Where: lambda self: self._parse_where(), 619 exp.Window: lambda self: self._parse_named_window(), 620 exp.With: lambda self: self._parse_with(), 621 "JOIN_TYPE": lambda self: self._parse_join_parts(), 622 } 623 624 STATEMENT_PARSERS = { 625 TokenType.ALTER: lambda self: self._parse_alter(), 626 TokenType.BEGIN: lambda self: self._parse_transaction(), 627 TokenType.CACHE: lambda self: self._parse_cache(), 628 TokenType.COMMENT: lambda self: self._parse_comment(), 629 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 630 TokenType.CREATE: lambda self: self._parse_create(), 631 TokenType.DELETE: lambda self: self._parse_delete(), 632 TokenType.DESC: lambda self: self._parse_describe(), 633 TokenType.DESCRIBE: lambda self: self._parse_describe(), 634 TokenType.DROP: lambda self: self._parse_drop(), 635 TokenType.INSERT: lambda self: self._parse_insert(), 636 TokenType.KILL: lambda self: self._parse_kill(), 637 TokenType.LOAD: lambda self: self._parse_load(), 638 TokenType.MERGE: lambda self: self._parse_merge(), 639 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 640 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 641 TokenType.REFRESH: lambda self: self._parse_refresh(), 642 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 643 TokenType.SET: lambda self: self._parse_set(), 644 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 645 TokenType.UNCACHE: lambda self: self._parse_uncache(), 646 TokenType.UPDATE: lambda self: self._parse_update(), 647 TokenType.USE: lambda self: self.expression( 648 exp.Use, 649 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 650 this=self._parse_table(schema=False), 651 ), 652 } 653 654 UNARY_PARSERS = { 655 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 656 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 657 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 658 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 659 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 660 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 661 } 662 663 STRING_PARSERS = { 664 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 665 exp.RawString, this=token.text 666 ), 667 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 668 exp.National, this=token.text 669 ), 670 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 671 TokenType.STRING: lambda self, token: self.expression( 672 exp.Literal, this=token.text, is_string=True 673 ), 674 TokenType.UNICODE_STRING: lambda self, token: self.expression( 675 exp.UnicodeString, 676 this=token.text, 677 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 678 ), 679 } 680 681 NUMERIC_PARSERS = { 682 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 683 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 684 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 685 TokenType.NUMBER: lambda self, token: self.expression( 686 exp.Literal, this=token.text, is_string=False 687 ), 688 } 689 690 PRIMARY_PARSERS = { 691 **STRING_PARSERS, 692 **NUMERIC_PARSERS, 693 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 694 TokenType.NULL: lambda self, _: self.expression(exp.Null), 695 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 696 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 697 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 698 TokenType.STAR: lambda self, _: self.expression( 699 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 700 ), 701 } 702 703 PLACEHOLDER_PARSERS = { 704 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 705 TokenType.PARAMETER: lambda self: self._parse_parameter(), 706 TokenType.COLON: lambda self: ( 707 self.expression(exp.Placeholder, this=self._prev.text) 708 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 709 else None 710 ), 711 } 712 713 RANGE_PARSERS = { 714 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 715 TokenType.GLOB: binary_range_parser(exp.Glob), 716 TokenType.ILIKE: binary_range_parser(exp.ILike), 717 TokenType.IN: lambda self, this: self._parse_in(this), 718 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 719 TokenType.IS: lambda self, this: self._parse_is(this), 720 TokenType.LIKE: binary_range_parser(exp.Like), 721 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 722 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 723 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 724 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 725 } 726 727 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 728 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 729 "AUTO": lambda self: self._parse_auto_property(), 730 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 731 "BACKUP": lambda self: self.expression( 732 exp.BackupProperty, this=self._parse_var(any_token=True) 733 ), 734 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 735 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 736 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHECKSUM": lambda self: self._parse_checksum(), 738 "CLUSTER BY": lambda self: self._parse_cluster(), 739 "CLUSTERED": lambda self: self._parse_clustered_by(), 740 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 741 exp.CollateProperty, **kwargs 742 ), 743 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 744 "CONTAINS": lambda self: self._parse_contains_property(), 745 "COPY": lambda self: self._parse_copy_property(), 746 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 747 "DEFINER": lambda self: self._parse_definer(), 748 "DETERMINISTIC": lambda self: self.expression( 749 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 750 ), 751 "DISTKEY": lambda self: self._parse_distkey(), 752 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 753 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 754 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 755 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 756 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 757 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 758 "FREESPACE": lambda self: self._parse_freespace(), 759 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 760 "HEAP": lambda self: self.expression(exp.HeapProperty), 761 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 762 "IMMUTABLE": lambda self: self.expression( 763 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 764 ), 765 "INHERITS": lambda self: self.expression( 766 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 767 ), 768 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 769 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 770 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 771 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 772 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 773 "LIKE": lambda self: self._parse_create_like(), 774 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 775 "LOCK": lambda self: self._parse_locking(), 776 "LOCKING": lambda self: self._parse_locking(), 777 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 778 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 779 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 780 "MODIFIES": lambda self: self._parse_modifies_property(), 781 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 782 "NO": lambda self: self._parse_no_property(), 783 "ON": lambda self: self._parse_on_property(), 784 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 785 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 786 "PARTITION": lambda self: self._parse_partitioned_of(), 787 "PARTITION BY": lambda self: self._parse_partitioned_by(), 788 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 790 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 791 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 792 "READS": lambda self: self._parse_reads_property(), 793 "REMOTE": lambda self: self._parse_remote_with_connection(), 794 "RETURNS": lambda self: self._parse_returns(), 795 "ROW": lambda self: self._parse_row(), 796 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 797 "SAMPLE": lambda self: self.expression( 798 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 799 ), 800 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 801 "SETTINGS": lambda self: self.expression( 802 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 803 ), 804 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 805 "SORTKEY": lambda self: self._parse_sortkey(), 806 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 807 "STABLE": lambda self: self.expression( 808 exp.StabilityProperty, this=exp.Literal.string("STABLE") 809 ), 810 "STORED": lambda self: self._parse_stored(), 811 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 812 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 813 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 814 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 815 "TO": lambda self: self._parse_to_table(), 816 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 817 "TRANSFORM": lambda self: self.expression( 818 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 819 ), 820 "TTL": lambda self: self._parse_ttl(), 821 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 822 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 823 "VOLATILE": lambda self: self._parse_volatile_property(), 824 "WITH": lambda self: self._parse_with_property(), 825 } 826 827 CONSTRAINT_PARSERS = { 828 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 829 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 830 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 831 "CHARACTER SET": lambda self: self.expression( 832 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 833 ), 834 "CHECK": lambda self: self.expression( 835 exp.CheckColumnConstraint, 836 this=self._parse_wrapped(self._parse_conjunction), 837 enforced=self._match_text_seq("ENFORCED"), 838 ), 839 "COLLATE": lambda self: self.expression( 840 exp.CollateColumnConstraint, this=self._parse_var() 841 ), 842 "COMMENT": lambda self: self.expression( 843 exp.CommentColumnConstraint, this=self._parse_string() 844 ), 845 "COMPRESS": lambda self: self._parse_compress(), 846 "CLUSTERED": lambda self: self.expression( 847 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 848 ), 849 "NONCLUSTERED": lambda self: self.expression( 850 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 851 ), 852 "DEFAULT": lambda self: self.expression( 853 exp.DefaultColumnConstraint, this=self._parse_bitwise() 854 ), 855 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 856 "EPHEMERAL": lambda self: self.expression( 857 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 858 ), 859 "EXCLUDE": lambda self: self.expression( 860 exp.ExcludeColumnConstraint, this=self._parse_index_params() 861 ), 862 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 863 "FORMAT": lambda self: self.expression( 864 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 865 ), 866 "GENERATED": lambda self: self._parse_generated_as_identity(), 867 "IDENTITY": lambda self: self._parse_auto_increment(), 868 "INLINE": lambda self: self._parse_inline(), 869 "LIKE": lambda self: self._parse_create_like(), 870 "NOT": lambda self: self._parse_not_constraint(), 871 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 872 "ON": lambda self: ( 873 self._match(TokenType.UPDATE) 874 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 875 ) 876 or self.expression(exp.OnProperty, this=self._parse_id_var()), 877 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 878 "PERIOD": lambda self: self._parse_period_for_system_time(), 879 "PRIMARY KEY": lambda self: self._parse_primary_key(), 880 "REFERENCES": lambda self: self._parse_references(match=False), 881 "TITLE": lambda self: self.expression( 882 exp.TitleColumnConstraint, this=self._parse_var_or_string() 883 ), 884 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 885 "UNIQUE": lambda self: self._parse_unique(), 886 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 887 "WITH": lambda self: self.expression( 888 exp.Properties, expressions=self._parse_wrapped_properties() 889 ), 890 } 891 892 ALTER_PARSERS = { 893 "ADD": lambda self: self._parse_alter_table_add(), 894 "ALTER": lambda self: self._parse_alter_table_alter(), 895 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 896 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 897 "DROP": lambda self: self._parse_alter_table_drop(), 898 "RENAME": lambda self: self._parse_alter_table_rename(), 899 } 900 901 SCHEMA_UNNAMED_CONSTRAINTS = { 902 "CHECK", 903 "EXCLUDE", 904 "FOREIGN KEY", 905 "LIKE", 906 "PERIOD", 907 "PRIMARY KEY", 908 "UNIQUE", 909 } 910 911 NO_PAREN_FUNCTION_PARSERS = { 912 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 913 "CASE": lambda self: self._parse_case(), 914 "IF": lambda self: self._parse_if(), 915 "NEXT": lambda self: self._parse_next_value_for(), 916 } 917 918 INVALID_FUNC_NAME_TOKENS = { 919 TokenType.IDENTIFIER, 920 TokenType.STRING, 921 } 922 923 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 924 925 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 926 927 FUNCTION_PARSERS = { 928 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 929 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 930 "DECODE": lambda self: self._parse_decode(), 931 "EXTRACT": lambda self: self._parse_extract(), 932 "JSON_OBJECT": lambda self: self._parse_json_object(), 933 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 934 "JSON_TABLE": lambda self: self._parse_json_table(), 935 "MATCH": lambda self: self._parse_match_against(), 936 "OPENJSON": lambda self: self._parse_open_json(), 937 "POSITION": lambda self: self._parse_position(), 938 "PREDICT": lambda self: self._parse_predict(), 939 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 940 "STRING_AGG": lambda self: self._parse_string_agg(), 941 "SUBSTRING": lambda self: self._parse_substring(), 942 "TRIM": lambda self: self._parse_trim(), 943 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 944 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 945 } 946 947 QUERY_MODIFIER_PARSERS = { 948 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 949 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 950 TokenType.WHERE: lambda self: ("where", self._parse_where()), 951 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 952 TokenType.HAVING: lambda self: ("having", self._parse_having()), 953 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 954 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 955 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 956 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 957 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 958 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 959 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 960 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 961 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 962 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.CLUSTER_BY: lambda self: ( 964 "cluster", 965 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 966 ), 967 TokenType.DISTRIBUTE_BY: lambda self: ( 968 "distribute", 969 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 970 ), 971 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 972 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 973 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 974 } 975 976 SET_PARSERS = { 977 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 978 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 979 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 980 "TRANSACTION": lambda self: self._parse_set_transaction(), 981 } 982 983 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 984 985 TYPE_LITERAL_PARSERS = { 986 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 987 } 988 989 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 990 991 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 992 993 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 994 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 995 "ISOLATION": ( 996 ("LEVEL", "REPEATABLE", "READ"), 997 ("LEVEL", "READ", "COMMITTED"), 998 ("LEVEL", "READ", "UNCOMITTED"), 999 ("LEVEL", "SERIALIZABLE"), 1000 ), 1001 "READ": ("WRITE", "ONLY"), 1002 } 1003 1004 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1005 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1006 ) 1007 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1008 1009 CREATE_SEQUENCE: OPTIONS_TYPE = { 1010 "SCALE": ("EXTEND", "NOEXTEND"), 1011 "SHARD": ("EXTEND", "NOEXTEND"), 1012 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1013 **dict.fromkeys( 1014 ( 1015 "SESSION", 1016 "GLOBAL", 1017 "KEEP", 1018 "NOKEEP", 1019 "ORDER", 1020 "NOORDER", 1021 "NOCACHE", 1022 "CYCLE", 1023 "NOCYCLE", 1024 "NOMINVALUE", 1025 "NOMAXVALUE", 1026 "NOSCALE", 1027 "NOSHARD", 1028 ), 1029 tuple(), 1030 ), 1031 } 1032 1033 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1034 1035 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1036 1037 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1038 1039 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1040 1041 CLONE_KEYWORDS = {"CLONE", "COPY"} 1042 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1043 1044 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1045 1046 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1047 1048 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1049 1050 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1051 1052 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1053 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1054 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1055 1056 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1057 1058 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1059 1060 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1061 1062 DISTINCT_TOKENS = {TokenType.DISTINCT} 1063 1064 NULL_TOKENS = {TokenType.NULL} 1065 1066 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1067 1068 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1069 1070 STRICT_CAST = True 1071 1072 PREFIXED_PIVOT_COLUMNS = False 1073 IDENTIFY_PIVOT_STRINGS = False 1074 1075 LOG_DEFAULTS_TO_LN = False 1076 1077 # Whether ADD is present for each column added by ALTER TABLE 1078 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1079 1080 # Whether the table sample clause expects CSV syntax 1081 TABLESAMPLE_CSV = False 1082 1083 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1084 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1085 1086 # Whether the TRIM function expects the characters to trim as its first argument 1087 TRIM_PATTERN_FIRST = False 1088 1089 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1090 STRING_ALIASES = False 1091 1092 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1093 MODIFIERS_ATTACHED_TO_UNION = True 1094 UNION_MODIFIERS = {"order", "limit", "offset"} 1095 1096 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1097 NO_PAREN_IF_COMMANDS = True 1098 1099 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1100 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1101 1102 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1103 # If this is True and '(' is not found, the keyword will be treated as an identifier 1104 VALUES_FOLLOWED_BY_PAREN = True 1105 1106 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1107 SUPPORTS_IMPLICIT_UNNEST = False 1108 1109 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1110 INTERVAL_SPANS = True 1111 1112 __slots__ = ( 1113 "error_level", 1114 "error_message_context", 1115 "max_errors", 1116 "dialect", 1117 "sql", 1118 "errors", 1119 "_tokens", 1120 "_index", 1121 "_curr", 1122 "_next", 1123 "_prev", 1124 "_prev_comments", 1125 ) 1126 1127 # Autofilled 1128 SHOW_TRIE: t.Dict = {} 1129 SET_TRIE: t.Dict = {} 1130 1131 def __init__( 1132 self, 1133 error_level: t.Optional[ErrorLevel] = None, 1134 error_message_context: int = 100, 1135 max_errors: int = 3, 1136 dialect: DialectType = None, 1137 ): 1138 from sqlglot.dialects import Dialect 1139 1140 self.error_level = error_level or ErrorLevel.IMMEDIATE 1141 self.error_message_context = error_message_context 1142 self.max_errors = max_errors 1143 self.dialect = Dialect.get_or_raise(dialect) 1144 self.reset() 1145 1146 def reset(self): 1147 self.sql = "" 1148 self.errors = [] 1149 self._tokens = [] 1150 self._index = 0 1151 self._curr = None 1152 self._next = None 1153 self._prev = None 1154 self._prev_comments = None 1155 1156 def parse( 1157 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1158 ) -> t.List[t.Optional[exp.Expression]]: 1159 """ 1160 Parses a list of tokens and returns a list of syntax trees, one tree 1161 per parsed SQL statement. 1162 1163 Args: 1164 raw_tokens: The list of tokens. 1165 sql: The original SQL string, used to produce helpful debug messages. 1166 1167 Returns: 1168 The list of the produced syntax trees. 1169 """ 1170 return self._parse( 1171 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1172 ) 1173 1174 def parse_into( 1175 self, 1176 expression_types: exp.IntoType, 1177 raw_tokens: t.List[Token], 1178 sql: t.Optional[str] = None, 1179 ) -> t.List[t.Optional[exp.Expression]]: 1180 """ 1181 Parses a list of tokens into a given Expression type. If a collection of Expression 1182 types is given instead, this method will try to parse the token list into each one 1183 of them, stopping at the first for which the parsing succeeds. 1184 1185 Args: 1186 expression_types: The expression type(s) to try and parse the token list into. 1187 raw_tokens: The list of tokens. 1188 sql: The original SQL string, used to produce helpful debug messages. 1189 1190 Returns: 1191 The target Expression. 1192 """ 1193 errors = [] 1194 for expression_type in ensure_list(expression_types): 1195 parser = self.EXPRESSION_PARSERS.get(expression_type) 1196 if not parser: 1197 raise TypeError(f"No parser registered for {expression_type}") 1198 1199 try: 1200 return self._parse(parser, raw_tokens, sql) 1201 except ParseError as e: 1202 e.errors[0]["into_expression"] = expression_type 1203 errors.append(e) 1204 1205 raise ParseError( 1206 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1207 errors=merge_errors(errors), 1208 ) from errors[-1] 1209 1210 def _parse( 1211 self, 1212 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1213 raw_tokens: t.List[Token], 1214 sql: t.Optional[str] = None, 1215 ) -> t.List[t.Optional[exp.Expression]]: 1216 self.reset() 1217 self.sql = sql or "" 1218 1219 total = len(raw_tokens) 1220 chunks: t.List[t.List[Token]] = [[]] 1221 1222 for i, token in enumerate(raw_tokens): 1223 if token.token_type == TokenType.SEMICOLON: 1224 if i < total - 1: 1225 chunks.append([]) 1226 else: 1227 chunks[-1].append(token) 1228 1229 expressions = [] 1230 1231 for tokens in chunks: 1232 self._index = -1 1233 self._tokens = tokens 1234 self._advance() 1235 1236 expressions.append(parse_method(self)) 1237 1238 if self._index < len(self._tokens): 1239 self.raise_error("Invalid expression / Unexpected token") 1240 1241 self.check_errors() 1242 1243 return expressions 1244 1245 def check_errors(self) -> None: 1246 """Logs or raises any found errors, depending on the chosen error level setting.""" 1247 if self.error_level == ErrorLevel.WARN: 1248 for error in self.errors: 1249 logger.error(str(error)) 1250 elif self.error_level == ErrorLevel.RAISE and self.errors: 1251 raise ParseError( 1252 concat_messages(self.errors, self.max_errors), 1253 errors=merge_errors(self.errors), 1254 ) 1255 1256 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1257 """ 1258 Appends an error in the list of recorded errors or raises it, depending on the chosen 1259 error level setting. 1260 """ 1261 token = token or self._curr or self._prev or Token.string("") 1262 start = token.start 1263 end = token.end + 1 1264 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1265 highlight = self.sql[start:end] 1266 end_context = self.sql[end : end + self.error_message_context] 1267 1268 error = ParseError.new( 1269 f"{message}. Line {token.line}, Col: {token.col}.\n" 1270 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1271 description=message, 1272 line=token.line, 1273 col=token.col, 1274 start_context=start_context, 1275 highlight=highlight, 1276 end_context=end_context, 1277 ) 1278 1279 if self.error_level == ErrorLevel.IMMEDIATE: 1280 raise error 1281 1282 self.errors.append(error) 1283 1284 def expression( 1285 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1286 ) -> E: 1287 """ 1288 Creates a new, validated Expression. 1289 1290 Args: 1291 exp_class: The expression class to instantiate. 1292 comments: An optional list of comments to attach to the expression. 1293 kwargs: The arguments to set for the expression along with their respective values. 1294 1295 Returns: 1296 The target expression. 1297 """ 1298 instance = exp_class(**kwargs) 1299 instance.add_comments(comments) if comments else self._add_comments(instance) 1300 return self.validate_expression(instance) 1301 1302 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1303 if expression and self._prev_comments: 1304 expression.add_comments(self._prev_comments) 1305 self._prev_comments = None 1306 1307 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1308 """ 1309 Validates an Expression, making sure that all its mandatory arguments are set. 1310 1311 Args: 1312 expression: The expression to validate. 1313 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1314 1315 Returns: 1316 The validated expression. 1317 """ 1318 if self.error_level != ErrorLevel.IGNORE: 1319 for error_message in expression.error_messages(args): 1320 self.raise_error(error_message) 1321 1322 return expression 1323 1324 def _find_sql(self, start: Token, end: Token) -> str: 1325 return self.sql[start.start : end.end + 1] 1326 1327 def _is_connected(self) -> bool: 1328 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1329 1330 def _advance(self, times: int = 1) -> None: 1331 self._index += times 1332 self._curr = seq_get(self._tokens, self._index) 1333 self._next = seq_get(self._tokens, self._index + 1) 1334 1335 if self._index > 0: 1336 self._prev = self._tokens[self._index - 1] 1337 self._prev_comments = self._prev.comments 1338 else: 1339 self._prev = None 1340 self._prev_comments = None 1341 1342 def _retreat(self, index: int) -> None: 1343 if index != self._index: 1344 self._advance(index - self._index) 1345 1346 def _warn_unsupported(self) -> None: 1347 if len(self._tokens) <= 1: 1348 return 1349 1350 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1351 # interested in emitting a warning for the one being currently processed. 1352 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1353 1354 logger.warning( 1355 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1356 ) 1357 1358 def _parse_command(self) -> exp.Command: 1359 self._warn_unsupported() 1360 return self.expression( 1361 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1362 ) 1363 1364 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1365 """ 1366 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1367 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1368 the parser state accordingly 1369 """ 1370 index = self._index 1371 error_level = self.error_level 1372 1373 self.error_level = ErrorLevel.IMMEDIATE 1374 try: 1375 this = parse_method() 1376 except ParseError: 1377 this = None 1378 finally: 1379 if not this or retreat: 1380 self._retreat(index) 1381 self.error_level = error_level 1382 1383 return this 1384 1385 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1386 start = self._prev 1387 exists = self._parse_exists() if allow_exists else None 1388 1389 self._match(TokenType.ON) 1390 1391 kind = self._match_set(self.CREATABLES) and self._prev 1392 if not kind: 1393 return self._parse_as_command(start) 1394 1395 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1396 this = self._parse_user_defined_function(kind=kind.token_type) 1397 elif kind.token_type == TokenType.TABLE: 1398 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1399 elif kind.token_type == TokenType.COLUMN: 1400 this = self._parse_column() 1401 else: 1402 this = self._parse_id_var() 1403 1404 self._match(TokenType.IS) 1405 1406 return self.expression( 1407 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1408 ) 1409 1410 def _parse_to_table( 1411 self, 1412 ) -> exp.ToTableProperty: 1413 table = self._parse_table_parts(schema=True) 1414 return self.expression(exp.ToTableProperty, this=table) 1415 1416 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1417 def _parse_ttl(self) -> exp.Expression: 1418 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1419 this = self._parse_bitwise() 1420 1421 if self._match_text_seq("DELETE"): 1422 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1423 if self._match_text_seq("RECOMPRESS"): 1424 return self.expression( 1425 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1426 ) 1427 if self._match_text_seq("TO", "DISK"): 1428 return self.expression( 1429 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1430 ) 1431 if self._match_text_seq("TO", "VOLUME"): 1432 return self.expression( 1433 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1434 ) 1435 1436 return this 1437 1438 expressions = self._parse_csv(_parse_ttl_action) 1439 where = self._parse_where() 1440 group = self._parse_group() 1441 1442 aggregates = None 1443 if group and self._match(TokenType.SET): 1444 aggregates = self._parse_csv(self._parse_set_item) 1445 1446 return self.expression( 1447 exp.MergeTreeTTL, 1448 expressions=expressions, 1449 where=where, 1450 group=group, 1451 aggregates=aggregates, 1452 ) 1453 1454 def _parse_statement(self) -> t.Optional[exp.Expression]: 1455 if self._curr is None: 1456 return None 1457 1458 if self._match_set(self.STATEMENT_PARSERS): 1459 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1460 1461 if self._match_set(Tokenizer.COMMANDS): 1462 return self._parse_command() 1463 1464 expression = self._parse_expression() 1465 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1466 return self._parse_query_modifiers(expression) 1467 1468 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1469 start = self._prev 1470 temporary = self._match(TokenType.TEMPORARY) 1471 materialized = self._match_text_seq("MATERIALIZED") 1472 1473 kind = self._match_set(self.CREATABLES) and self._prev.text 1474 if not kind: 1475 return self._parse_as_command(start) 1476 1477 if_exists = exists or self._parse_exists() 1478 table = self._parse_table_parts( 1479 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1480 ) 1481 1482 if self._match(TokenType.L_PAREN, advance=False): 1483 expressions = self._parse_wrapped_csv(self._parse_types) 1484 else: 1485 expressions = None 1486 1487 return self.expression( 1488 exp.Drop, 1489 comments=start.comments, 1490 exists=if_exists, 1491 this=table, 1492 expressions=expressions, 1493 kind=kind, 1494 temporary=temporary, 1495 materialized=materialized, 1496 cascade=self._match_text_seq("CASCADE"), 1497 constraints=self._match_text_seq("CONSTRAINTS"), 1498 purge=self._match_text_seq("PURGE"), 1499 ) 1500 1501 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1502 return ( 1503 self._match_text_seq("IF") 1504 and (not not_ or self._match(TokenType.NOT)) 1505 and self._match(TokenType.EXISTS) 1506 ) 1507 1508 def _parse_create(self) -> exp.Create | exp.Command: 1509 # Note: this can't be None because we've matched a statement parser 1510 start = self._prev 1511 comments = self._prev_comments 1512 1513 replace = ( 1514 start.token_type == TokenType.REPLACE 1515 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1516 or self._match_pair(TokenType.OR, TokenType.ALTER) 1517 ) 1518 1519 unique = self._match(TokenType.UNIQUE) 1520 1521 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1522 self._advance() 1523 1524 properties = None 1525 create_token = self._match_set(self.CREATABLES) and self._prev 1526 1527 if not create_token: 1528 # exp.Properties.Location.POST_CREATE 1529 properties = self._parse_properties() 1530 create_token = self._match_set(self.CREATABLES) and self._prev 1531 1532 if not properties or not create_token: 1533 return self._parse_as_command(start) 1534 1535 exists = self._parse_exists(not_=True) 1536 this = None 1537 expression: t.Optional[exp.Expression] = None 1538 indexes = None 1539 no_schema_binding = None 1540 begin = None 1541 end = None 1542 clone = None 1543 1544 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1545 nonlocal properties 1546 if properties and temp_props: 1547 properties.expressions.extend(temp_props.expressions) 1548 elif temp_props: 1549 properties = temp_props 1550 1551 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1552 this = self._parse_user_defined_function(kind=create_token.token_type) 1553 1554 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1555 extend_props(self._parse_properties()) 1556 1557 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1558 1559 if not expression: 1560 if self._match(TokenType.COMMAND): 1561 expression = self._parse_as_command(self._prev) 1562 else: 1563 begin = self._match(TokenType.BEGIN) 1564 return_ = self._match_text_seq("RETURN") 1565 1566 if self._match(TokenType.STRING, advance=False): 1567 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1568 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1569 expression = self._parse_string() 1570 extend_props(self._parse_properties()) 1571 else: 1572 expression = self._parse_statement() 1573 1574 end = self._match_text_seq("END") 1575 1576 if return_: 1577 expression = self.expression(exp.Return, this=expression) 1578 elif create_token.token_type == TokenType.INDEX: 1579 this = self._parse_index(index=self._parse_id_var()) 1580 elif create_token.token_type in self.DB_CREATABLES: 1581 table_parts = self._parse_table_parts( 1582 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1583 ) 1584 1585 # exp.Properties.Location.POST_NAME 1586 self._match(TokenType.COMMA) 1587 extend_props(self._parse_properties(before=True)) 1588 1589 this = self._parse_schema(this=table_parts) 1590 1591 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1592 extend_props(self._parse_properties()) 1593 1594 self._match(TokenType.ALIAS) 1595 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1596 # exp.Properties.Location.POST_ALIAS 1597 extend_props(self._parse_properties()) 1598 1599 if create_token.token_type == TokenType.SEQUENCE: 1600 expression = self._parse_types() 1601 extend_props(self._parse_properties()) 1602 else: 1603 expression = self._parse_ddl_select() 1604 1605 if create_token.token_type == TokenType.TABLE: 1606 # exp.Properties.Location.POST_EXPRESSION 1607 extend_props(self._parse_properties()) 1608 1609 indexes = [] 1610 while True: 1611 index = self._parse_index() 1612 1613 # exp.Properties.Location.POST_INDEX 1614 extend_props(self._parse_properties()) 1615 1616 if not index: 1617 break 1618 else: 1619 self._match(TokenType.COMMA) 1620 indexes.append(index) 1621 elif create_token.token_type == TokenType.VIEW: 1622 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1623 no_schema_binding = True 1624 1625 shallow = self._match_text_seq("SHALLOW") 1626 1627 if self._match_texts(self.CLONE_KEYWORDS): 1628 copy = self._prev.text.lower() == "copy" 1629 clone = self.expression( 1630 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1631 ) 1632 1633 if self._curr: 1634 return self._parse_as_command(start) 1635 1636 return self.expression( 1637 exp.Create, 1638 comments=comments, 1639 this=this, 1640 kind=create_token.text.upper(), 1641 replace=replace, 1642 unique=unique, 1643 expression=expression, 1644 exists=exists, 1645 properties=properties, 1646 indexes=indexes, 1647 no_schema_binding=no_schema_binding, 1648 begin=begin, 1649 end=end, 1650 clone=clone, 1651 ) 1652 1653 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1654 seq = exp.SequenceProperties() 1655 1656 options = [] 1657 index = self._index 1658 1659 while self._curr: 1660 if self._match_text_seq("INCREMENT"): 1661 self._match_text_seq("BY") 1662 self._match_text_seq("=") 1663 seq.set("increment", self._parse_term()) 1664 elif self._match_text_seq("MINVALUE"): 1665 seq.set("minvalue", self._parse_term()) 1666 elif self._match_text_seq("MAXVALUE"): 1667 seq.set("maxvalue", self._parse_term()) 1668 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1669 self._match_text_seq("=") 1670 seq.set("start", self._parse_term()) 1671 elif self._match_text_seq("CACHE"): 1672 # T-SQL allows empty CACHE which is initialized dynamically 1673 seq.set("cache", self._parse_number() or True) 1674 elif self._match_text_seq("OWNED", "BY"): 1675 # "OWNED BY NONE" is the default 1676 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1677 else: 1678 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1679 if opt: 1680 options.append(opt) 1681 else: 1682 break 1683 1684 seq.set("options", options if options else None) 1685 return None if self._index == index else seq 1686 1687 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1688 # only used for teradata currently 1689 self._match(TokenType.COMMA) 1690 1691 kwargs = { 1692 "no": self._match_text_seq("NO"), 1693 "dual": self._match_text_seq("DUAL"), 1694 "before": self._match_text_seq("BEFORE"), 1695 "default": self._match_text_seq("DEFAULT"), 1696 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1697 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1698 "after": self._match_text_seq("AFTER"), 1699 "minimum": self._match_texts(("MIN", "MINIMUM")), 1700 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1701 } 1702 1703 if self._match_texts(self.PROPERTY_PARSERS): 1704 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1705 try: 1706 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1707 except TypeError: 1708 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1709 1710 return None 1711 1712 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1713 return self._parse_wrapped_csv(self._parse_property) 1714 1715 def _parse_property(self) -> t.Optional[exp.Expression]: 1716 if self._match_texts(self.PROPERTY_PARSERS): 1717 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1718 1719 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1720 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1721 1722 if self._match_text_seq("COMPOUND", "SORTKEY"): 1723 return self._parse_sortkey(compound=True) 1724 1725 if self._match_text_seq("SQL", "SECURITY"): 1726 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1727 1728 index = self._index 1729 key = self._parse_column() 1730 1731 if not self._match(TokenType.EQ): 1732 self._retreat(index) 1733 return self._parse_sequence_properties() 1734 1735 return self.expression( 1736 exp.Property, 1737 this=key.to_dot() if isinstance(key, exp.Column) else key, 1738 value=self._parse_bitwise() or self._parse_var(any_token=True), 1739 ) 1740 1741 def _parse_stored(self) -> exp.FileFormatProperty: 1742 self._match(TokenType.ALIAS) 1743 1744 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1745 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1746 1747 return self.expression( 1748 exp.FileFormatProperty, 1749 this=( 1750 self.expression( 1751 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1752 ) 1753 if input_format or output_format 1754 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1755 ), 1756 ) 1757 1758 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1759 self._match(TokenType.EQ) 1760 self._match(TokenType.ALIAS) 1761 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1762 1763 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1764 properties = [] 1765 while True: 1766 if before: 1767 prop = self._parse_property_before() 1768 else: 1769 prop = self._parse_property() 1770 if not prop: 1771 break 1772 for p in ensure_list(prop): 1773 properties.append(p) 1774 1775 if properties: 1776 return self.expression(exp.Properties, expressions=properties) 1777 1778 return None 1779 1780 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1781 return self.expression( 1782 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1783 ) 1784 1785 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1786 if self._index >= 2: 1787 pre_volatile_token = self._tokens[self._index - 2] 1788 else: 1789 pre_volatile_token = None 1790 1791 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1792 return exp.VolatileProperty() 1793 1794 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1795 1796 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1797 self._match_pair(TokenType.EQ, TokenType.ON) 1798 1799 prop = self.expression(exp.WithSystemVersioningProperty) 1800 if self._match(TokenType.L_PAREN): 1801 self._match_text_seq("HISTORY_TABLE", "=") 1802 prop.set("this", self._parse_table_parts()) 1803 1804 if self._match(TokenType.COMMA): 1805 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1806 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1807 1808 self._match_r_paren() 1809 1810 return prop 1811 1812 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1813 if self._match(TokenType.L_PAREN, advance=False): 1814 return self._parse_wrapped_properties() 1815 1816 if self._match_text_seq("JOURNAL"): 1817 return self._parse_withjournaltable() 1818 1819 if self._match_texts(self.VIEW_ATTRIBUTES): 1820 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1821 1822 if self._match_text_seq("DATA"): 1823 return self._parse_withdata(no=False) 1824 elif self._match_text_seq("NO", "DATA"): 1825 return self._parse_withdata(no=True) 1826 1827 if not self._next: 1828 return None 1829 1830 return self._parse_withisolatedloading() 1831 1832 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1833 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1834 self._match(TokenType.EQ) 1835 1836 user = self._parse_id_var() 1837 self._match(TokenType.PARAMETER) 1838 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1839 1840 if not user or not host: 1841 return None 1842 1843 return exp.DefinerProperty(this=f"{user}@{host}") 1844 1845 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1846 self._match(TokenType.TABLE) 1847 self._match(TokenType.EQ) 1848 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1849 1850 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1851 return self.expression(exp.LogProperty, no=no) 1852 1853 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1854 return self.expression(exp.JournalProperty, **kwargs) 1855 1856 def _parse_checksum(self) -> exp.ChecksumProperty: 1857 self._match(TokenType.EQ) 1858 1859 on = None 1860 if self._match(TokenType.ON): 1861 on = True 1862 elif self._match_text_seq("OFF"): 1863 on = False 1864 1865 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1866 1867 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1868 return self.expression( 1869 exp.Cluster, 1870 expressions=( 1871 self._parse_wrapped_csv(self._parse_ordered) 1872 if wrapped 1873 else self._parse_csv(self._parse_ordered) 1874 ), 1875 ) 1876 1877 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1878 self._match_text_seq("BY") 1879 1880 self._match_l_paren() 1881 expressions = self._parse_csv(self._parse_column) 1882 self._match_r_paren() 1883 1884 if self._match_text_seq("SORTED", "BY"): 1885 self._match_l_paren() 1886 sorted_by = self._parse_csv(self._parse_ordered) 1887 self._match_r_paren() 1888 else: 1889 sorted_by = None 1890 1891 self._match(TokenType.INTO) 1892 buckets = self._parse_number() 1893 self._match_text_seq("BUCKETS") 1894 1895 return self.expression( 1896 exp.ClusteredByProperty, 1897 expressions=expressions, 1898 sorted_by=sorted_by, 1899 buckets=buckets, 1900 ) 1901 1902 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1903 if not self._match_text_seq("GRANTS"): 1904 self._retreat(self._index - 1) 1905 return None 1906 1907 return self.expression(exp.CopyGrantsProperty) 1908 1909 def _parse_freespace(self) -> exp.FreespaceProperty: 1910 self._match(TokenType.EQ) 1911 return self.expression( 1912 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1913 ) 1914 1915 def _parse_mergeblockratio( 1916 self, no: bool = False, default: bool = False 1917 ) -> exp.MergeBlockRatioProperty: 1918 if self._match(TokenType.EQ): 1919 return self.expression( 1920 exp.MergeBlockRatioProperty, 1921 this=self._parse_number(), 1922 percent=self._match(TokenType.PERCENT), 1923 ) 1924 1925 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1926 1927 def _parse_datablocksize( 1928 self, 1929 default: t.Optional[bool] = None, 1930 minimum: t.Optional[bool] = None, 1931 maximum: t.Optional[bool] = None, 1932 ) -> exp.DataBlocksizeProperty: 1933 self._match(TokenType.EQ) 1934 size = self._parse_number() 1935 1936 units = None 1937 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1938 units = self._prev.text 1939 1940 return self.expression( 1941 exp.DataBlocksizeProperty, 1942 size=size, 1943 units=units, 1944 default=default, 1945 minimum=minimum, 1946 maximum=maximum, 1947 ) 1948 1949 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1950 self._match(TokenType.EQ) 1951 always = self._match_text_seq("ALWAYS") 1952 manual = self._match_text_seq("MANUAL") 1953 never = self._match_text_seq("NEVER") 1954 default = self._match_text_seq("DEFAULT") 1955 1956 autotemp = None 1957 if self._match_text_seq("AUTOTEMP"): 1958 autotemp = self._parse_schema() 1959 1960 return self.expression( 1961 exp.BlockCompressionProperty, 1962 always=always, 1963 manual=manual, 1964 never=never, 1965 default=default, 1966 autotemp=autotemp, 1967 ) 1968 1969 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1970 index = self._index 1971 no = self._match_text_seq("NO") 1972 concurrent = self._match_text_seq("CONCURRENT") 1973 1974 if not self._match_text_seq("ISOLATED", "LOADING"): 1975 self._retreat(index) 1976 return None 1977 1978 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1979 return self.expression( 1980 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1981 ) 1982 1983 def _parse_locking(self) -> exp.LockingProperty: 1984 if self._match(TokenType.TABLE): 1985 kind = "TABLE" 1986 elif self._match(TokenType.VIEW): 1987 kind = "VIEW" 1988 elif self._match(TokenType.ROW): 1989 kind = "ROW" 1990 elif self._match_text_seq("DATABASE"): 1991 kind = "DATABASE" 1992 else: 1993 kind = None 1994 1995 if kind in ("DATABASE", "TABLE", "VIEW"): 1996 this = self._parse_table_parts() 1997 else: 1998 this = None 1999 2000 if self._match(TokenType.FOR): 2001 for_or_in = "FOR" 2002 elif self._match(TokenType.IN): 2003 for_or_in = "IN" 2004 else: 2005 for_or_in = None 2006 2007 if self._match_text_seq("ACCESS"): 2008 lock_type = "ACCESS" 2009 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2010 lock_type = "EXCLUSIVE" 2011 elif self._match_text_seq("SHARE"): 2012 lock_type = "SHARE" 2013 elif self._match_text_seq("READ"): 2014 lock_type = "READ" 2015 elif self._match_text_seq("WRITE"): 2016 lock_type = "WRITE" 2017 elif self._match_text_seq("CHECKSUM"): 2018 lock_type = "CHECKSUM" 2019 else: 2020 lock_type = None 2021 2022 override = self._match_text_seq("OVERRIDE") 2023 2024 return self.expression( 2025 exp.LockingProperty, 2026 this=this, 2027 kind=kind, 2028 for_or_in=for_or_in, 2029 lock_type=lock_type, 2030 override=override, 2031 ) 2032 2033 def _parse_partition_by(self) -> t.List[exp.Expression]: 2034 if self._match(TokenType.PARTITION_BY): 2035 return self._parse_csv(self._parse_conjunction) 2036 return [] 2037 2038 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2039 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2040 if self._match_text_seq("MINVALUE"): 2041 return exp.var("MINVALUE") 2042 if self._match_text_seq("MAXVALUE"): 2043 return exp.var("MAXVALUE") 2044 return self._parse_bitwise() 2045 2046 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2047 expression = None 2048 from_expressions = None 2049 to_expressions = None 2050 2051 if self._match(TokenType.IN): 2052 this = self._parse_wrapped_csv(self._parse_bitwise) 2053 elif self._match(TokenType.FROM): 2054 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2055 self._match_text_seq("TO") 2056 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2057 elif self._match_text_seq("WITH", "(", "MODULUS"): 2058 this = self._parse_number() 2059 self._match_text_seq(",", "REMAINDER") 2060 expression = self._parse_number() 2061 self._match_r_paren() 2062 else: 2063 self.raise_error("Failed to parse partition bound spec.") 2064 2065 return self.expression( 2066 exp.PartitionBoundSpec, 2067 this=this, 2068 expression=expression, 2069 from_expressions=from_expressions, 2070 to_expressions=to_expressions, 2071 ) 2072 2073 # https://www.postgresql.org/docs/current/sql-createtable.html 2074 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2075 if not self._match_text_seq("OF"): 2076 self._retreat(self._index - 1) 2077 return None 2078 2079 this = self._parse_table(schema=True) 2080 2081 if self._match(TokenType.DEFAULT): 2082 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2083 elif self._match_text_seq("FOR", "VALUES"): 2084 expression = self._parse_partition_bound_spec() 2085 else: 2086 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2087 2088 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2089 2090 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2091 self._match(TokenType.EQ) 2092 return self.expression( 2093 exp.PartitionedByProperty, 2094 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2095 ) 2096 2097 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2098 if self._match_text_seq("AND", "STATISTICS"): 2099 statistics = True 2100 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2101 statistics = False 2102 else: 2103 statistics = None 2104 2105 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2106 2107 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2108 if self._match_text_seq("SQL"): 2109 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2110 return None 2111 2112 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2113 if self._match_text_seq("SQL", "DATA"): 2114 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2115 return None 2116 2117 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2118 if self._match_text_seq("PRIMARY", "INDEX"): 2119 return exp.NoPrimaryIndexProperty() 2120 if self._match_text_seq("SQL"): 2121 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2122 return None 2123 2124 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2125 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2126 return exp.OnCommitProperty() 2127 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2128 return exp.OnCommitProperty(delete=True) 2129 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2130 2131 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2132 if self._match_text_seq("SQL", "DATA"): 2133 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2134 return None 2135 2136 def _parse_distkey(self) -> exp.DistKeyProperty: 2137 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2138 2139 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2140 table = self._parse_table(schema=True) 2141 2142 options = [] 2143 while self._match_texts(("INCLUDING", "EXCLUDING")): 2144 this = self._prev.text.upper() 2145 2146 id_var = self._parse_id_var() 2147 if not id_var: 2148 return None 2149 2150 options.append( 2151 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2152 ) 2153 2154 return self.expression(exp.LikeProperty, this=table, expressions=options) 2155 2156 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2157 return self.expression( 2158 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2159 ) 2160 2161 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2162 self._match(TokenType.EQ) 2163 return self.expression( 2164 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2165 ) 2166 2167 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2168 self._match_text_seq("WITH", "CONNECTION") 2169 return self.expression( 2170 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2171 ) 2172 2173 def _parse_returns(self) -> exp.ReturnsProperty: 2174 value: t.Optional[exp.Expression] 2175 is_table = self._match(TokenType.TABLE) 2176 2177 if is_table: 2178 if self._match(TokenType.LT): 2179 value = self.expression( 2180 exp.Schema, 2181 this="TABLE", 2182 expressions=self._parse_csv(self._parse_struct_types), 2183 ) 2184 if not self._match(TokenType.GT): 2185 self.raise_error("Expecting >") 2186 else: 2187 value = self._parse_schema(exp.var("TABLE")) 2188 else: 2189 value = self._parse_types() 2190 2191 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2192 2193 def _parse_describe(self) -> exp.Describe: 2194 kind = self._match_set(self.CREATABLES) and self._prev.text 2195 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2196 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2197 style = None 2198 self._retreat(self._index - 1) 2199 this = self._parse_table(schema=True) 2200 properties = self._parse_properties() 2201 expressions = properties.expressions if properties else None 2202 return self.expression( 2203 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2204 ) 2205 2206 def _parse_insert(self) -> exp.Insert: 2207 comments = ensure_list(self._prev_comments) 2208 hint = self._parse_hint() 2209 overwrite = self._match(TokenType.OVERWRITE) 2210 ignore = self._match(TokenType.IGNORE) 2211 local = self._match_text_seq("LOCAL") 2212 alternative = None 2213 is_function = None 2214 2215 if self._match_text_seq("DIRECTORY"): 2216 this: t.Optional[exp.Expression] = self.expression( 2217 exp.Directory, 2218 this=self._parse_var_or_string(), 2219 local=local, 2220 row_format=self._parse_row_format(match_row=True), 2221 ) 2222 else: 2223 if self._match(TokenType.OR): 2224 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2225 2226 self._match(TokenType.INTO) 2227 comments += ensure_list(self._prev_comments) 2228 self._match(TokenType.TABLE) 2229 is_function = self._match(TokenType.FUNCTION) 2230 2231 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2232 2233 returning = self._parse_returning() 2234 2235 return self.expression( 2236 exp.Insert, 2237 comments=comments, 2238 hint=hint, 2239 is_function=is_function, 2240 this=this, 2241 by_name=self._match_text_seq("BY", "NAME"), 2242 exists=self._parse_exists(), 2243 partition=self._parse_partition(), 2244 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2245 and self._parse_conjunction(), 2246 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2247 conflict=self._parse_on_conflict(), 2248 returning=returning or self._parse_returning(), 2249 overwrite=overwrite, 2250 alternative=alternative, 2251 ignore=ignore, 2252 ) 2253 2254 def _parse_kill(self) -> exp.Kill: 2255 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2256 2257 return self.expression( 2258 exp.Kill, 2259 this=self._parse_primary(), 2260 kind=kind, 2261 ) 2262 2263 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2264 conflict = self._match_text_seq("ON", "CONFLICT") 2265 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2266 2267 if not conflict and not duplicate: 2268 return None 2269 2270 conflict_keys = None 2271 constraint = None 2272 2273 if conflict: 2274 if self._match_text_seq("ON", "CONSTRAINT"): 2275 constraint = self._parse_id_var() 2276 elif self._match(TokenType.L_PAREN): 2277 conflict_keys = self._parse_csv(self._parse_id_var) 2278 self._match_r_paren() 2279 2280 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2281 if self._prev.token_type == TokenType.UPDATE: 2282 self._match(TokenType.SET) 2283 expressions = self._parse_csv(self._parse_equality) 2284 else: 2285 expressions = None 2286 2287 return self.expression( 2288 exp.OnConflict, 2289 duplicate=duplicate, 2290 expressions=expressions, 2291 action=action, 2292 conflict_keys=conflict_keys, 2293 constraint=constraint, 2294 ) 2295 2296 def _parse_returning(self) -> t.Optional[exp.Returning]: 2297 if not self._match(TokenType.RETURNING): 2298 return None 2299 return self.expression( 2300 exp.Returning, 2301 expressions=self._parse_csv(self._parse_expression), 2302 into=self._match(TokenType.INTO) and self._parse_table_part(), 2303 ) 2304 2305 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2306 if not self._match(TokenType.FORMAT): 2307 return None 2308 return self._parse_row_format() 2309 2310 def _parse_row_format( 2311 self, match_row: bool = False 2312 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2313 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2314 return None 2315 2316 if self._match_text_seq("SERDE"): 2317 this = self._parse_string() 2318 2319 serde_properties = None 2320 if self._match(TokenType.SERDE_PROPERTIES): 2321 serde_properties = self.expression( 2322 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2323 ) 2324 2325 return self.expression( 2326 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2327 ) 2328 2329 self._match_text_seq("DELIMITED") 2330 2331 kwargs = {} 2332 2333 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2334 kwargs["fields"] = self._parse_string() 2335 if self._match_text_seq("ESCAPED", "BY"): 2336 kwargs["escaped"] = self._parse_string() 2337 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2338 kwargs["collection_items"] = self._parse_string() 2339 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2340 kwargs["map_keys"] = self._parse_string() 2341 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2342 kwargs["lines"] = self._parse_string() 2343 if self._match_text_seq("NULL", "DEFINED", "AS"): 2344 kwargs["null"] = self._parse_string() 2345 2346 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2347 2348 def _parse_load(self) -> exp.LoadData | exp.Command: 2349 if self._match_text_seq("DATA"): 2350 local = self._match_text_seq("LOCAL") 2351 self._match_text_seq("INPATH") 2352 inpath = self._parse_string() 2353 overwrite = self._match(TokenType.OVERWRITE) 2354 self._match_pair(TokenType.INTO, TokenType.TABLE) 2355 2356 return self.expression( 2357 exp.LoadData, 2358 this=self._parse_table(schema=True), 2359 local=local, 2360 overwrite=overwrite, 2361 inpath=inpath, 2362 partition=self._parse_partition(), 2363 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2364 serde=self._match_text_seq("SERDE") and self._parse_string(), 2365 ) 2366 return self._parse_as_command(self._prev) 2367 2368 def _parse_delete(self) -> exp.Delete: 2369 # This handles MySQL's "Multiple-Table Syntax" 2370 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2371 tables = None 2372 comments = self._prev_comments 2373 if not self._match(TokenType.FROM, advance=False): 2374 tables = self._parse_csv(self._parse_table) or None 2375 2376 returning = self._parse_returning() 2377 2378 return self.expression( 2379 exp.Delete, 2380 comments=comments, 2381 tables=tables, 2382 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2383 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2384 where=self._parse_where(), 2385 returning=returning or self._parse_returning(), 2386 limit=self._parse_limit(), 2387 ) 2388 2389 def _parse_update(self) -> exp.Update: 2390 comments = self._prev_comments 2391 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2392 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2393 returning = self._parse_returning() 2394 return self.expression( 2395 exp.Update, 2396 comments=comments, 2397 **{ # type: ignore 2398 "this": this, 2399 "expressions": expressions, 2400 "from": self._parse_from(joins=True), 2401 "where": self._parse_where(), 2402 "returning": returning or self._parse_returning(), 2403 "order": self._parse_order(), 2404 "limit": self._parse_limit(), 2405 }, 2406 ) 2407 2408 def _parse_uncache(self) -> exp.Uncache: 2409 if not self._match(TokenType.TABLE): 2410 self.raise_error("Expecting TABLE after UNCACHE") 2411 2412 return self.expression( 2413 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2414 ) 2415 2416 def _parse_cache(self) -> exp.Cache: 2417 lazy = self._match_text_seq("LAZY") 2418 self._match(TokenType.TABLE) 2419 table = self._parse_table(schema=True) 2420 2421 options = [] 2422 if self._match_text_seq("OPTIONS"): 2423 self._match_l_paren() 2424 k = self._parse_string() 2425 self._match(TokenType.EQ) 2426 v = self._parse_string() 2427 options = [k, v] 2428 self._match_r_paren() 2429 2430 self._match(TokenType.ALIAS) 2431 return self.expression( 2432 exp.Cache, 2433 this=table, 2434 lazy=lazy, 2435 options=options, 2436 expression=self._parse_select(nested=True), 2437 ) 2438 2439 def _parse_partition(self) -> t.Optional[exp.Partition]: 2440 if not self._match(TokenType.PARTITION): 2441 return None 2442 2443 return self.expression( 2444 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2445 ) 2446 2447 def _parse_value(self) -> exp.Tuple: 2448 if self._match(TokenType.L_PAREN): 2449 expressions = self._parse_csv(self._parse_expression) 2450 self._match_r_paren() 2451 return self.expression(exp.Tuple, expressions=expressions) 2452 2453 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2454 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2455 2456 def _parse_projections(self) -> t.List[exp.Expression]: 2457 return self._parse_expressions() 2458 2459 def _parse_select( 2460 self, 2461 nested: bool = False, 2462 table: bool = False, 2463 parse_subquery_alias: bool = True, 2464 parse_set_operation: bool = True, 2465 ) -> t.Optional[exp.Expression]: 2466 cte = self._parse_with() 2467 2468 if cte: 2469 this = self._parse_statement() 2470 2471 if not this: 2472 self.raise_error("Failed to parse any statement following CTE") 2473 return cte 2474 2475 if "with" in this.arg_types: 2476 this.set("with", cte) 2477 else: 2478 self.raise_error(f"{this.key} does not support CTE") 2479 this = cte 2480 2481 return this 2482 2483 # duckdb supports leading with FROM x 2484 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2485 2486 if self._match(TokenType.SELECT): 2487 comments = self._prev_comments 2488 2489 hint = self._parse_hint() 2490 all_ = self._match(TokenType.ALL) 2491 distinct = self._match_set(self.DISTINCT_TOKENS) 2492 2493 kind = ( 2494 self._match(TokenType.ALIAS) 2495 and self._match_texts(("STRUCT", "VALUE")) 2496 and self._prev.text.upper() 2497 ) 2498 2499 if distinct: 2500 distinct = self.expression( 2501 exp.Distinct, 2502 on=self._parse_value() if self._match(TokenType.ON) else None, 2503 ) 2504 2505 if all_ and distinct: 2506 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2507 2508 limit = self._parse_limit(top=True) 2509 projections = self._parse_projections() 2510 2511 this = self.expression( 2512 exp.Select, 2513 kind=kind, 2514 hint=hint, 2515 distinct=distinct, 2516 expressions=projections, 2517 limit=limit, 2518 ) 2519 this.comments = comments 2520 2521 into = self._parse_into() 2522 if into: 2523 this.set("into", into) 2524 2525 if not from_: 2526 from_ = self._parse_from() 2527 2528 if from_: 2529 this.set("from", from_) 2530 2531 this = self._parse_query_modifiers(this) 2532 elif (table or nested) and self._match(TokenType.L_PAREN): 2533 if self._match(TokenType.PIVOT): 2534 this = self._parse_simplified_pivot() 2535 elif self._match(TokenType.FROM): 2536 this = exp.select("*").from_( 2537 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2538 ) 2539 else: 2540 this = ( 2541 self._parse_table() 2542 if table 2543 else self._parse_select(nested=True, parse_set_operation=False) 2544 ) 2545 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2546 2547 self._match_r_paren() 2548 2549 # We return early here so that the UNION isn't attached to the subquery by the 2550 # following call to _parse_set_operations, but instead becomes the parent node 2551 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2552 elif self._match(TokenType.VALUES, advance=False): 2553 this = self._parse_derived_table_values() 2554 elif from_: 2555 this = exp.select("*").from_(from_.this, copy=False) 2556 else: 2557 this = None 2558 2559 if parse_set_operation: 2560 return self._parse_set_operations(this) 2561 return this 2562 2563 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2564 if not skip_with_token and not self._match(TokenType.WITH): 2565 return None 2566 2567 comments = self._prev_comments 2568 recursive = self._match(TokenType.RECURSIVE) 2569 2570 expressions = [] 2571 while True: 2572 expressions.append(self._parse_cte()) 2573 2574 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2575 break 2576 else: 2577 self._match(TokenType.WITH) 2578 2579 return self.expression( 2580 exp.With, comments=comments, expressions=expressions, recursive=recursive 2581 ) 2582 2583 def _parse_cte(self) -> exp.CTE: 2584 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2585 if not alias or not alias.this: 2586 self.raise_error("Expected CTE to have alias") 2587 2588 self._match(TokenType.ALIAS) 2589 2590 if self._match_text_seq("NOT", "MATERIALIZED"): 2591 materialized = False 2592 elif self._match_text_seq("MATERIALIZED"): 2593 materialized = True 2594 else: 2595 materialized = None 2596 2597 return self.expression( 2598 exp.CTE, 2599 this=self._parse_wrapped(self._parse_statement), 2600 alias=alias, 2601 materialized=materialized, 2602 ) 2603 2604 def _parse_table_alias( 2605 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2606 ) -> t.Optional[exp.TableAlias]: 2607 any_token = self._match(TokenType.ALIAS) 2608 alias = ( 2609 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2610 or self._parse_string_as_identifier() 2611 ) 2612 2613 index = self._index 2614 if self._match(TokenType.L_PAREN): 2615 columns = self._parse_csv(self._parse_function_parameter) 2616 self._match_r_paren() if columns else self._retreat(index) 2617 else: 2618 columns = None 2619 2620 if not alias and not columns: 2621 return None 2622 2623 return self.expression(exp.TableAlias, this=alias, columns=columns) 2624 2625 def _parse_subquery( 2626 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2627 ) -> t.Optional[exp.Subquery]: 2628 if not this: 2629 return None 2630 2631 return self.expression( 2632 exp.Subquery, 2633 this=this, 2634 pivots=self._parse_pivots(), 2635 alias=self._parse_table_alias() if parse_alias else None, 2636 ) 2637 2638 def _implicit_unnests_to_explicit(self, this: E) -> E: 2639 from sqlglot.optimizer.normalize_identifiers import ( 2640 normalize_identifiers as _norm, 2641 ) 2642 2643 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2644 for i, join in enumerate(this.args.get("joins") or []): 2645 table = join.this 2646 normalized_table = table.copy() 2647 normalized_table.meta["maybe_column"] = True 2648 normalized_table = _norm(normalized_table, dialect=self.dialect) 2649 2650 if isinstance(table, exp.Table) and not join.args.get("on"): 2651 if normalized_table.parts[0].name in refs: 2652 table_as_column = table.to_column() 2653 unnest = exp.Unnest(expressions=[table_as_column]) 2654 2655 # Table.to_column creates a parent Alias node that we want to convert to 2656 # a TableAlias and attach to the Unnest, so it matches the parser's output 2657 if isinstance(table.args.get("alias"), exp.TableAlias): 2658 table_as_column.replace(table_as_column.this) 2659 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2660 2661 table.replace(unnest) 2662 2663 refs.add(normalized_table.alias_or_name) 2664 2665 return this 2666 2667 def _parse_query_modifiers( 2668 self, this: t.Optional[exp.Expression] 2669 ) -> t.Optional[exp.Expression]: 2670 if isinstance(this, (exp.Query, exp.Table)): 2671 for join in self._parse_joins(): 2672 this.append("joins", join) 2673 for lateral in iter(self._parse_lateral, None): 2674 this.append("laterals", lateral) 2675 2676 while True: 2677 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2678 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2679 key, expression = parser(self) 2680 2681 if expression: 2682 this.set(key, expression) 2683 if key == "limit": 2684 offset = expression.args.pop("offset", None) 2685 2686 if offset: 2687 offset = exp.Offset(expression=offset) 2688 this.set("offset", offset) 2689 2690 limit_by_expressions = expression.expressions 2691 expression.set("expressions", None) 2692 offset.set("expressions", limit_by_expressions) 2693 continue 2694 break 2695 2696 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2697 this = self._implicit_unnests_to_explicit(this) 2698 2699 return this 2700 2701 def _parse_hint(self) -> t.Optional[exp.Hint]: 2702 if self._match(TokenType.HINT): 2703 hints = [] 2704 for hint in iter( 2705 lambda: self._parse_csv( 2706 lambda: self._parse_function() or self._parse_var(upper=True) 2707 ), 2708 [], 2709 ): 2710 hints.extend(hint) 2711 2712 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2713 self.raise_error("Expected */ after HINT") 2714 2715 return self.expression(exp.Hint, expressions=hints) 2716 2717 return None 2718 2719 def _parse_into(self) -> t.Optional[exp.Into]: 2720 if not self._match(TokenType.INTO): 2721 return None 2722 2723 temp = self._match(TokenType.TEMPORARY) 2724 unlogged = self._match_text_seq("UNLOGGED") 2725 self._match(TokenType.TABLE) 2726 2727 return self.expression( 2728 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2729 ) 2730 2731 def _parse_from( 2732 self, joins: bool = False, skip_from_token: bool = False 2733 ) -> t.Optional[exp.From]: 2734 if not skip_from_token and not self._match(TokenType.FROM): 2735 return None 2736 2737 return self.expression( 2738 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2739 ) 2740 2741 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2742 return self.expression( 2743 exp.MatchRecognizeMeasure, 2744 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2745 this=self._parse_expression(), 2746 ) 2747 2748 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2749 if not self._match(TokenType.MATCH_RECOGNIZE): 2750 return None 2751 2752 self._match_l_paren() 2753 2754 partition = self._parse_partition_by() 2755 order = self._parse_order() 2756 2757 measures = ( 2758 self._parse_csv(self._parse_match_recognize_measure) 2759 if self._match_text_seq("MEASURES") 2760 else None 2761 ) 2762 2763 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2764 rows = exp.var("ONE ROW PER MATCH") 2765 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2766 text = "ALL ROWS PER MATCH" 2767 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2768 text += " SHOW EMPTY MATCHES" 2769 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2770 text += " OMIT EMPTY MATCHES" 2771 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2772 text += " WITH UNMATCHED ROWS" 2773 rows = exp.var(text) 2774 else: 2775 rows = None 2776 2777 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2778 text = "AFTER MATCH SKIP" 2779 if self._match_text_seq("PAST", "LAST", "ROW"): 2780 text += " PAST LAST ROW" 2781 elif self._match_text_seq("TO", "NEXT", "ROW"): 2782 text += " TO NEXT ROW" 2783 elif self._match_text_seq("TO", "FIRST"): 2784 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2785 elif self._match_text_seq("TO", "LAST"): 2786 text += f" TO LAST {self._advance_any().text}" # type: ignore 2787 after = exp.var(text) 2788 else: 2789 after = None 2790 2791 if self._match_text_seq("PATTERN"): 2792 self._match_l_paren() 2793 2794 if not self._curr: 2795 self.raise_error("Expecting )", self._curr) 2796 2797 paren = 1 2798 start = self._curr 2799 2800 while self._curr and paren > 0: 2801 if self._curr.token_type == TokenType.L_PAREN: 2802 paren += 1 2803 if self._curr.token_type == TokenType.R_PAREN: 2804 paren -= 1 2805 2806 end = self._prev 2807 self._advance() 2808 2809 if paren > 0: 2810 self.raise_error("Expecting )", self._curr) 2811 2812 pattern = exp.var(self._find_sql(start, end)) 2813 else: 2814 pattern = None 2815 2816 define = ( 2817 self._parse_csv(self._parse_name_as_expression) 2818 if self._match_text_seq("DEFINE") 2819 else None 2820 ) 2821 2822 self._match_r_paren() 2823 2824 return self.expression( 2825 exp.MatchRecognize, 2826 partition_by=partition, 2827 order=order, 2828 measures=measures, 2829 rows=rows, 2830 after=after, 2831 pattern=pattern, 2832 define=define, 2833 alias=self._parse_table_alias(), 2834 ) 2835 2836 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2837 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2838 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2839 cross_apply = False 2840 2841 if cross_apply is not None: 2842 this = self._parse_select(table=True) 2843 view = None 2844 outer = None 2845 elif self._match(TokenType.LATERAL): 2846 this = self._parse_select(table=True) 2847 view = self._match(TokenType.VIEW) 2848 outer = self._match(TokenType.OUTER) 2849 else: 2850 return None 2851 2852 if not this: 2853 this = ( 2854 self._parse_unnest() 2855 or self._parse_function() 2856 or self._parse_id_var(any_token=False) 2857 ) 2858 2859 while self._match(TokenType.DOT): 2860 this = exp.Dot( 2861 this=this, 2862 expression=self._parse_function() or self._parse_id_var(any_token=False), 2863 ) 2864 2865 if view: 2866 table = self._parse_id_var(any_token=False) 2867 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2868 table_alias: t.Optional[exp.TableAlias] = self.expression( 2869 exp.TableAlias, this=table, columns=columns 2870 ) 2871 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2872 # We move the alias from the lateral's child node to the lateral itself 2873 table_alias = this.args["alias"].pop() 2874 else: 2875 table_alias = self._parse_table_alias() 2876 2877 return self.expression( 2878 exp.Lateral, 2879 this=this, 2880 view=view, 2881 outer=outer, 2882 alias=table_alias, 2883 cross_apply=cross_apply, 2884 ) 2885 2886 def _parse_join_parts( 2887 self, 2888 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2889 return ( 2890 self._match_set(self.JOIN_METHODS) and self._prev, 2891 self._match_set(self.JOIN_SIDES) and self._prev, 2892 self._match_set(self.JOIN_KINDS) and self._prev, 2893 ) 2894 2895 def _parse_join( 2896 self, skip_join_token: bool = False, parse_bracket: bool = False 2897 ) -> t.Optional[exp.Join]: 2898 if self._match(TokenType.COMMA): 2899 return self.expression(exp.Join, this=self._parse_table()) 2900 2901 index = self._index 2902 method, side, kind = self._parse_join_parts() 2903 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2904 join = self._match(TokenType.JOIN) 2905 2906 if not skip_join_token and not join: 2907 self._retreat(index) 2908 kind = None 2909 method = None 2910 side = None 2911 2912 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2913 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2914 2915 if not skip_join_token and not join and not outer_apply and not cross_apply: 2916 return None 2917 2918 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2919 2920 if method: 2921 kwargs["method"] = method.text 2922 if side: 2923 kwargs["side"] = side.text 2924 if kind: 2925 kwargs["kind"] = kind.text 2926 if hint: 2927 kwargs["hint"] = hint 2928 2929 if self._match(TokenType.MATCH_CONDITION): 2930 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2931 2932 if self._match(TokenType.ON): 2933 kwargs["on"] = self._parse_conjunction() 2934 elif self._match(TokenType.USING): 2935 kwargs["using"] = self._parse_wrapped_id_vars() 2936 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2937 kind and kind.token_type == TokenType.CROSS 2938 ): 2939 index = self._index 2940 joins: t.Optional[list] = list(self._parse_joins()) 2941 2942 if joins and self._match(TokenType.ON): 2943 kwargs["on"] = self._parse_conjunction() 2944 elif joins and self._match(TokenType.USING): 2945 kwargs["using"] = self._parse_wrapped_id_vars() 2946 else: 2947 joins = None 2948 self._retreat(index) 2949 2950 kwargs["this"].set("joins", joins if joins else None) 2951 2952 comments = [c for token in (method, side, kind) if token for c in token.comments] 2953 return self.expression(exp.Join, comments=comments, **kwargs) 2954 2955 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2956 this = self._parse_conjunction() 2957 2958 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2959 return this 2960 2961 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2962 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2963 2964 return this 2965 2966 def _parse_index_params(self) -> exp.IndexParameters: 2967 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2968 2969 if self._match(TokenType.L_PAREN, advance=False): 2970 columns = self._parse_wrapped_csv(self._parse_with_operator) 2971 else: 2972 columns = None 2973 2974 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2975 partition_by = self._parse_partition_by() 2976 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2977 tablespace = ( 2978 self._parse_var(any_token=True) 2979 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2980 else None 2981 ) 2982 where = self._parse_where() 2983 2984 return self.expression( 2985 exp.IndexParameters, 2986 using=using, 2987 columns=columns, 2988 include=include, 2989 partition_by=partition_by, 2990 where=where, 2991 with_storage=with_storage, 2992 tablespace=tablespace, 2993 ) 2994 2995 def _parse_index( 2996 self, 2997 index: t.Optional[exp.Expression] = None, 2998 ) -> t.Optional[exp.Index]: 2999 if index: 3000 unique = None 3001 primary = None 3002 amp = None 3003 3004 self._match(TokenType.ON) 3005 self._match(TokenType.TABLE) # hive 3006 table = self._parse_table_parts(schema=True) 3007 else: 3008 unique = self._match(TokenType.UNIQUE) 3009 primary = self._match_text_seq("PRIMARY") 3010 amp = self._match_text_seq("AMP") 3011 3012 if not self._match(TokenType.INDEX): 3013 return None 3014 3015 index = self._parse_id_var() 3016 table = None 3017 3018 params = self._parse_index_params() 3019 3020 return self.expression( 3021 exp.Index, 3022 this=index, 3023 table=table, 3024 unique=unique, 3025 primary=primary, 3026 amp=amp, 3027 params=params, 3028 ) 3029 3030 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3031 hints: t.List[exp.Expression] = [] 3032 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3033 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3034 hints.append( 3035 self.expression( 3036 exp.WithTableHint, 3037 expressions=self._parse_csv( 3038 lambda: self._parse_function() or self._parse_var(any_token=True) 3039 ), 3040 ) 3041 ) 3042 self._match_r_paren() 3043 else: 3044 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3045 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3046 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3047 3048 self._match_texts(("INDEX", "KEY")) 3049 if self._match(TokenType.FOR): 3050 hint.set("target", self._advance_any() and self._prev.text.upper()) 3051 3052 hint.set("expressions", self._parse_wrapped_id_vars()) 3053 hints.append(hint) 3054 3055 return hints or None 3056 3057 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3058 return ( 3059 (not schema and self._parse_function(optional_parens=False)) 3060 or self._parse_id_var(any_token=False) 3061 or self._parse_string_as_identifier() 3062 or self._parse_placeholder() 3063 ) 3064 3065 def _parse_table_parts( 3066 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3067 ) -> exp.Table: 3068 catalog = None 3069 db = None 3070 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3071 3072 while self._match(TokenType.DOT): 3073 if catalog: 3074 # This allows nesting the table in arbitrarily many dot expressions if needed 3075 table = self.expression( 3076 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3077 ) 3078 else: 3079 catalog = db 3080 db = table 3081 # "" used for tsql FROM a..b case 3082 table = self._parse_table_part(schema=schema) or "" 3083 3084 if ( 3085 wildcard 3086 and self._is_connected() 3087 and (isinstance(table, exp.Identifier) or not table) 3088 and self._match(TokenType.STAR) 3089 ): 3090 if isinstance(table, exp.Identifier): 3091 table.args["this"] += "*" 3092 else: 3093 table = exp.Identifier(this="*") 3094 3095 if is_db_reference: 3096 catalog = db 3097 db = table 3098 table = None 3099 3100 if not table and not is_db_reference: 3101 self.raise_error(f"Expected table name but got {self._curr}") 3102 if not db and is_db_reference: 3103 self.raise_error(f"Expected database name but got {self._curr}") 3104 3105 return self.expression( 3106 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3107 ) 3108 3109 def _parse_table( 3110 self, 3111 schema: bool = False, 3112 joins: bool = False, 3113 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3114 parse_bracket: bool = False, 3115 is_db_reference: bool = False, 3116 ) -> t.Optional[exp.Expression]: 3117 lateral = self._parse_lateral() 3118 if lateral: 3119 return lateral 3120 3121 unnest = self._parse_unnest() 3122 if unnest: 3123 return unnest 3124 3125 values = self._parse_derived_table_values() 3126 if values: 3127 return values 3128 3129 subquery = self._parse_select(table=True) 3130 if subquery: 3131 if not subquery.args.get("pivots"): 3132 subquery.set("pivots", self._parse_pivots()) 3133 return subquery 3134 3135 bracket = parse_bracket and self._parse_bracket(None) 3136 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3137 3138 only = self._match(TokenType.ONLY) 3139 3140 this = t.cast( 3141 exp.Expression, 3142 bracket 3143 or self._parse_bracket( 3144 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3145 ), 3146 ) 3147 3148 if only: 3149 this.set("only", only) 3150 3151 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3152 self._match_text_seq("*") 3153 3154 if schema: 3155 return self._parse_schema(this=this) 3156 3157 version = self._parse_version() 3158 3159 if version: 3160 this.set("version", version) 3161 3162 if self.dialect.ALIAS_POST_TABLESAMPLE: 3163 table_sample = self._parse_table_sample() 3164 3165 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3166 if alias: 3167 this.set("alias", alias) 3168 3169 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3170 return self.expression( 3171 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3172 ) 3173 3174 this.set("hints", self._parse_table_hints()) 3175 3176 if not this.args.get("pivots"): 3177 this.set("pivots", self._parse_pivots()) 3178 3179 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3180 table_sample = self._parse_table_sample() 3181 3182 if table_sample: 3183 table_sample.set("this", this) 3184 this = table_sample 3185 3186 if joins: 3187 for join in self._parse_joins(): 3188 this.append("joins", join) 3189 3190 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3191 this.set("ordinality", True) 3192 this.set("alias", self._parse_table_alias()) 3193 3194 return this 3195 3196 def _parse_version(self) -> t.Optional[exp.Version]: 3197 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3198 this = "TIMESTAMP" 3199 elif self._match(TokenType.VERSION_SNAPSHOT): 3200 this = "VERSION" 3201 else: 3202 return None 3203 3204 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3205 kind = self._prev.text.upper() 3206 start = self._parse_bitwise() 3207 self._match_texts(("TO", "AND")) 3208 end = self._parse_bitwise() 3209 expression: t.Optional[exp.Expression] = self.expression( 3210 exp.Tuple, expressions=[start, end] 3211 ) 3212 elif self._match_text_seq("CONTAINED", "IN"): 3213 kind = "CONTAINED IN" 3214 expression = self.expression( 3215 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3216 ) 3217 elif self._match(TokenType.ALL): 3218 kind = "ALL" 3219 expression = None 3220 else: 3221 self._match_text_seq("AS", "OF") 3222 kind = "AS OF" 3223 expression = self._parse_type() 3224 3225 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3226 3227 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3228 if not self._match(TokenType.UNNEST): 3229 return None 3230 3231 expressions = self._parse_wrapped_csv(self._parse_equality) 3232 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3233 3234 alias = self._parse_table_alias() if with_alias else None 3235 3236 if alias: 3237 if self.dialect.UNNEST_COLUMN_ONLY: 3238 if alias.args.get("columns"): 3239 self.raise_error("Unexpected extra column alias in unnest.") 3240 3241 alias.set("columns", [alias.this]) 3242 alias.set("this", None) 3243 3244 columns = alias.args.get("columns") or [] 3245 if offset and len(expressions) < len(columns): 3246 offset = columns.pop() 3247 3248 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3249 self._match(TokenType.ALIAS) 3250 offset = self._parse_id_var( 3251 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3252 ) or exp.to_identifier("offset") 3253 3254 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3255 3256 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3257 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3258 if not is_derived and not self._match_text_seq("VALUES"): 3259 return None 3260 3261 expressions = self._parse_csv(self._parse_value) 3262 alias = self._parse_table_alias() 3263 3264 if is_derived: 3265 self._match_r_paren() 3266 3267 return self.expression( 3268 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3269 ) 3270 3271 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3272 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3273 as_modifier and self._match_text_seq("USING", "SAMPLE") 3274 ): 3275 return None 3276 3277 bucket_numerator = None 3278 bucket_denominator = None 3279 bucket_field = None 3280 percent = None 3281 size = None 3282 seed = None 3283 3284 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3285 matched_l_paren = self._match(TokenType.L_PAREN) 3286 3287 if self.TABLESAMPLE_CSV: 3288 num = None 3289 expressions = self._parse_csv(self._parse_primary) 3290 else: 3291 expressions = None 3292 num = ( 3293 self._parse_factor() 3294 if self._match(TokenType.NUMBER, advance=False) 3295 else self._parse_primary() or self._parse_placeholder() 3296 ) 3297 3298 if self._match_text_seq("BUCKET"): 3299 bucket_numerator = self._parse_number() 3300 self._match_text_seq("OUT", "OF") 3301 bucket_denominator = bucket_denominator = self._parse_number() 3302 self._match(TokenType.ON) 3303 bucket_field = self._parse_field() 3304 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3305 percent = num 3306 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3307 size = num 3308 else: 3309 percent = num 3310 3311 if matched_l_paren: 3312 self._match_r_paren() 3313 3314 if self._match(TokenType.L_PAREN): 3315 method = self._parse_var(upper=True) 3316 seed = self._match(TokenType.COMMA) and self._parse_number() 3317 self._match_r_paren() 3318 elif self._match_texts(("SEED", "REPEATABLE")): 3319 seed = self._parse_wrapped(self._parse_number) 3320 3321 return self.expression( 3322 exp.TableSample, 3323 expressions=expressions, 3324 method=method, 3325 bucket_numerator=bucket_numerator, 3326 bucket_denominator=bucket_denominator, 3327 bucket_field=bucket_field, 3328 percent=percent, 3329 size=size, 3330 seed=seed, 3331 ) 3332 3333 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3334 return list(iter(self._parse_pivot, None)) or None 3335 3336 def _parse_joins(self) -> t.Iterator[exp.Join]: 3337 return iter(self._parse_join, None) 3338 3339 # https://duckdb.org/docs/sql/statements/pivot 3340 def _parse_simplified_pivot(self) -> exp.Pivot: 3341 def _parse_on() -> t.Optional[exp.Expression]: 3342 this = self._parse_bitwise() 3343 return self._parse_in(this) if self._match(TokenType.IN) else this 3344 3345 this = self._parse_table() 3346 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3347 using = self._match(TokenType.USING) and self._parse_csv( 3348 lambda: self._parse_alias(self._parse_function()) 3349 ) 3350 group = self._parse_group() 3351 return self.expression( 3352 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3353 ) 3354 3355 def _parse_pivot_in(self) -> exp.In: 3356 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3357 this = self._parse_conjunction() 3358 3359 self._match(TokenType.ALIAS) 3360 alias = self._parse_field() 3361 if alias: 3362 return self.expression(exp.PivotAlias, this=this, alias=alias) 3363 3364 return this 3365 3366 value = self._parse_column() 3367 3368 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3369 self.raise_error("Expecting IN (") 3370 3371 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3372 3373 self._match_r_paren() 3374 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3375 3376 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3377 index = self._index 3378 include_nulls = None 3379 3380 if self._match(TokenType.PIVOT): 3381 unpivot = False 3382 elif self._match(TokenType.UNPIVOT): 3383 unpivot = True 3384 3385 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3386 if self._match_text_seq("INCLUDE", "NULLS"): 3387 include_nulls = True 3388 elif self._match_text_seq("EXCLUDE", "NULLS"): 3389 include_nulls = False 3390 else: 3391 return None 3392 3393 expressions = [] 3394 3395 if not self._match(TokenType.L_PAREN): 3396 self._retreat(index) 3397 return None 3398 3399 if unpivot: 3400 expressions = self._parse_csv(self._parse_column) 3401 else: 3402 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3403 3404 if not expressions: 3405 self.raise_error("Failed to parse PIVOT's aggregation list") 3406 3407 if not self._match(TokenType.FOR): 3408 self.raise_error("Expecting FOR") 3409 3410 field = self._parse_pivot_in() 3411 3412 self._match_r_paren() 3413 3414 pivot = self.expression( 3415 exp.Pivot, 3416 expressions=expressions, 3417 field=field, 3418 unpivot=unpivot, 3419 include_nulls=include_nulls, 3420 ) 3421 3422 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3423 pivot.set("alias", self._parse_table_alias()) 3424 3425 if not unpivot: 3426 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3427 3428 columns: t.List[exp.Expression] = [] 3429 for fld in pivot.args["field"].expressions: 3430 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3431 for name in names: 3432 if self.PREFIXED_PIVOT_COLUMNS: 3433 name = f"{name}_{field_name}" if name else field_name 3434 else: 3435 name = f"{field_name}_{name}" if name else field_name 3436 3437 columns.append(exp.to_identifier(name)) 3438 3439 pivot.set("columns", columns) 3440 3441 return pivot 3442 3443 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3444 return [agg.alias for agg in aggregations] 3445 3446 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3447 if not skip_where_token and not self._match(TokenType.PREWHERE): 3448 return None 3449 3450 return self.expression( 3451 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3452 ) 3453 3454 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3455 if not skip_where_token and not self._match(TokenType.WHERE): 3456 return None 3457 3458 return self.expression( 3459 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3460 ) 3461 3462 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3463 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3464 return None 3465 3466 elements: t.Dict[str, t.Any] = defaultdict(list) 3467 3468 if self._match(TokenType.ALL): 3469 elements["all"] = True 3470 elif self._match(TokenType.DISTINCT): 3471 elements["all"] = False 3472 3473 while True: 3474 expressions = self._parse_csv(self._parse_conjunction) 3475 if expressions: 3476 elements["expressions"].extend(expressions) 3477 3478 grouping_sets = self._parse_grouping_sets() 3479 if grouping_sets: 3480 elements["grouping_sets"].extend(grouping_sets) 3481 3482 rollup = None 3483 cube = None 3484 totals = None 3485 3486 index = self._index 3487 with_ = self._match(TokenType.WITH) 3488 if self._match(TokenType.ROLLUP): 3489 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3490 elements["rollup"].extend(ensure_list(rollup)) 3491 3492 if self._match(TokenType.CUBE): 3493 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3494 elements["cube"].extend(ensure_list(cube)) 3495 3496 if self._match_text_seq("TOTALS"): 3497 totals = True 3498 elements["totals"] = True # type: ignore 3499 3500 if not (grouping_sets or rollup or cube or totals): 3501 if with_: 3502 self._retreat(index) 3503 break 3504 3505 return self.expression(exp.Group, **elements) # type: ignore 3506 3507 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3508 if not self._match(TokenType.GROUPING_SETS): 3509 return None 3510 3511 return self._parse_wrapped_csv(self._parse_grouping_set) 3512 3513 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3514 if self._match(TokenType.L_PAREN): 3515 grouping_set = self._parse_csv(self._parse_column) 3516 self._match_r_paren() 3517 return self.expression(exp.Tuple, expressions=grouping_set) 3518 3519 return self._parse_column() 3520 3521 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3522 if not skip_having_token and not self._match(TokenType.HAVING): 3523 return None 3524 return self.expression(exp.Having, this=self._parse_conjunction()) 3525 3526 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3527 if not self._match(TokenType.QUALIFY): 3528 return None 3529 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3530 3531 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3532 if skip_start_token: 3533 start = None 3534 elif self._match(TokenType.START_WITH): 3535 start = self._parse_conjunction() 3536 else: 3537 return None 3538 3539 self._match(TokenType.CONNECT_BY) 3540 nocycle = self._match_text_seq("NOCYCLE") 3541 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3542 exp.Prior, this=self._parse_bitwise() 3543 ) 3544 connect = self._parse_conjunction() 3545 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3546 3547 if not start and self._match(TokenType.START_WITH): 3548 start = self._parse_conjunction() 3549 3550 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3551 3552 def _parse_name_as_expression(self) -> exp.Alias: 3553 return self.expression( 3554 exp.Alias, 3555 alias=self._parse_id_var(any_token=True), 3556 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3557 ) 3558 3559 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3560 if self._match_text_seq("INTERPOLATE"): 3561 return self._parse_wrapped_csv(self._parse_name_as_expression) 3562 return None 3563 3564 def _parse_order( 3565 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3566 ) -> t.Optional[exp.Expression]: 3567 siblings = None 3568 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3569 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3570 return this 3571 3572 siblings = True 3573 3574 return self.expression( 3575 exp.Order, 3576 this=this, 3577 expressions=self._parse_csv(self._parse_ordered), 3578 interpolate=self._parse_interpolate(), 3579 siblings=siblings, 3580 ) 3581 3582 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3583 if not self._match(token): 3584 return None 3585 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3586 3587 def _parse_ordered( 3588 self, parse_method: t.Optional[t.Callable] = None 3589 ) -> t.Optional[exp.Ordered]: 3590 this = parse_method() if parse_method else self._parse_conjunction() 3591 if not this: 3592 return None 3593 3594 asc = self._match(TokenType.ASC) 3595 desc = self._match(TokenType.DESC) or (asc and False) 3596 3597 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3598 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3599 3600 nulls_first = is_nulls_first or False 3601 explicitly_null_ordered = is_nulls_first or is_nulls_last 3602 3603 if ( 3604 not explicitly_null_ordered 3605 and ( 3606 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3607 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3608 ) 3609 and self.dialect.NULL_ORDERING != "nulls_are_last" 3610 ): 3611 nulls_first = True 3612 3613 if self._match_text_seq("WITH", "FILL"): 3614 with_fill = self.expression( 3615 exp.WithFill, 3616 **{ # type: ignore 3617 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3618 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3619 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3620 }, 3621 ) 3622 else: 3623 with_fill = None 3624 3625 return self.expression( 3626 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3627 ) 3628 3629 def _parse_limit( 3630 self, 3631 this: t.Optional[exp.Expression] = None, 3632 top: bool = False, 3633 skip_limit_token: bool = False, 3634 ) -> t.Optional[exp.Expression]: 3635 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3636 comments = self._prev_comments 3637 if top: 3638 limit_paren = self._match(TokenType.L_PAREN) 3639 expression = self._parse_term() if limit_paren else self._parse_number() 3640 3641 if limit_paren: 3642 self._match_r_paren() 3643 else: 3644 expression = self._parse_term() 3645 3646 if self._match(TokenType.COMMA): 3647 offset = expression 3648 expression = self._parse_term() 3649 else: 3650 offset = None 3651 3652 limit_exp = self.expression( 3653 exp.Limit, 3654 this=this, 3655 expression=expression, 3656 offset=offset, 3657 comments=comments, 3658 expressions=self._parse_limit_by(), 3659 ) 3660 3661 return limit_exp 3662 3663 if self._match(TokenType.FETCH): 3664 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3665 direction = self._prev.text.upper() if direction else "FIRST" 3666 3667 count = self._parse_field(tokens=self.FETCH_TOKENS) 3668 percent = self._match(TokenType.PERCENT) 3669 3670 self._match_set((TokenType.ROW, TokenType.ROWS)) 3671 3672 only = self._match_text_seq("ONLY") 3673 with_ties = self._match_text_seq("WITH", "TIES") 3674 3675 if only and with_ties: 3676 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3677 3678 return self.expression( 3679 exp.Fetch, 3680 direction=direction, 3681 count=count, 3682 percent=percent, 3683 with_ties=with_ties, 3684 ) 3685 3686 return this 3687 3688 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3689 if not self._match(TokenType.OFFSET): 3690 return this 3691 3692 count = self._parse_term() 3693 self._match_set((TokenType.ROW, TokenType.ROWS)) 3694 3695 return self.expression( 3696 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3697 ) 3698 3699 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3700 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3701 3702 def _parse_locks(self) -> t.List[exp.Lock]: 3703 locks = [] 3704 while True: 3705 if self._match_text_seq("FOR", "UPDATE"): 3706 update = True 3707 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3708 "LOCK", "IN", "SHARE", "MODE" 3709 ): 3710 update = False 3711 else: 3712 break 3713 3714 expressions = None 3715 if self._match_text_seq("OF"): 3716 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3717 3718 wait: t.Optional[bool | exp.Expression] = None 3719 if self._match_text_seq("NOWAIT"): 3720 wait = True 3721 elif self._match_text_seq("WAIT"): 3722 wait = self._parse_primary() 3723 elif self._match_text_seq("SKIP", "LOCKED"): 3724 wait = False 3725 3726 locks.append( 3727 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3728 ) 3729 3730 return locks 3731 3732 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3733 while this and self._match_set(self.SET_OPERATIONS): 3734 token_type = self._prev.token_type 3735 3736 if token_type == TokenType.UNION: 3737 operation = exp.Union 3738 elif token_type == TokenType.EXCEPT: 3739 operation = exp.Except 3740 else: 3741 operation = exp.Intersect 3742 3743 comments = self._prev.comments 3744 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3745 by_name = self._match_text_seq("BY", "NAME") 3746 expression = self._parse_select(nested=True, parse_set_operation=False) 3747 3748 this = self.expression( 3749 operation, 3750 comments=comments, 3751 this=this, 3752 distinct=distinct, 3753 by_name=by_name, 3754 expression=expression, 3755 ) 3756 3757 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3758 expression = this.expression 3759 3760 if expression: 3761 for arg in self.UNION_MODIFIERS: 3762 expr = expression.args.get(arg) 3763 if expr: 3764 this.set(arg, expr.pop()) 3765 3766 return this 3767 3768 def _parse_expression(self) -> t.Optional[exp.Expression]: 3769 return self._parse_alias(self._parse_conjunction()) 3770 3771 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3772 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3773 3774 def _parse_equality(self) -> t.Optional[exp.Expression]: 3775 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3776 3777 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3778 return self._parse_tokens(self._parse_range, self.COMPARISON) 3779 3780 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3781 this = this or self._parse_bitwise() 3782 negate = self._match(TokenType.NOT) 3783 3784 if self._match_set(self.RANGE_PARSERS): 3785 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3786 if not expression: 3787 return this 3788 3789 this = expression 3790 elif self._match(TokenType.ISNULL): 3791 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3792 3793 # Postgres supports ISNULL and NOTNULL for conditions. 3794 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3795 if self._match(TokenType.NOTNULL): 3796 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3797 this = self.expression(exp.Not, this=this) 3798 3799 if negate: 3800 this = self.expression(exp.Not, this=this) 3801 3802 if self._match(TokenType.IS): 3803 this = self._parse_is(this) 3804 3805 return this 3806 3807 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3808 index = self._index - 1 3809 negate = self._match(TokenType.NOT) 3810 3811 if self._match_text_seq("DISTINCT", "FROM"): 3812 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3813 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3814 3815 expression = self._parse_null() or self._parse_boolean() 3816 if not expression: 3817 self._retreat(index) 3818 return None 3819 3820 this = self.expression(exp.Is, this=this, expression=expression) 3821 return self.expression(exp.Not, this=this) if negate else this 3822 3823 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3824 unnest = self._parse_unnest(with_alias=False) 3825 if unnest: 3826 this = self.expression(exp.In, this=this, unnest=unnest) 3827 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3828 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3829 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3830 3831 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3832 this = self.expression(exp.In, this=this, query=expressions[0]) 3833 else: 3834 this = self.expression(exp.In, this=this, expressions=expressions) 3835 3836 if matched_l_paren: 3837 self._match_r_paren(this) 3838 elif not self._match(TokenType.R_BRACKET, expression=this): 3839 self.raise_error("Expecting ]") 3840 else: 3841 this = self.expression(exp.In, this=this, field=self._parse_field()) 3842 3843 return this 3844 3845 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3846 low = self._parse_bitwise() 3847 self._match(TokenType.AND) 3848 high = self._parse_bitwise() 3849 return self.expression(exp.Between, this=this, low=low, high=high) 3850 3851 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3852 if not self._match(TokenType.ESCAPE): 3853 return this 3854 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3855 3856 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3857 index = self._index 3858 3859 if not self._match(TokenType.INTERVAL) and match_interval: 3860 return None 3861 3862 if self._match(TokenType.STRING, advance=False): 3863 this = self._parse_primary() 3864 else: 3865 this = self._parse_term() 3866 3867 if not this or ( 3868 isinstance(this, exp.Column) 3869 and not this.table 3870 and not this.this.quoted 3871 and this.name.upper() == "IS" 3872 ): 3873 self._retreat(index) 3874 return None 3875 3876 unit = self._parse_function() or ( 3877 not self._match(TokenType.ALIAS, advance=False) 3878 and self._parse_var(any_token=True, upper=True) 3879 ) 3880 3881 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3882 # each INTERVAL expression into this canonical form so it's easy to transpile 3883 if this and this.is_number: 3884 this = exp.Literal.string(this.name) 3885 elif this and this.is_string: 3886 parts = this.name.split() 3887 3888 if len(parts) == 2: 3889 if unit: 3890 # This is not actually a unit, it's something else (e.g. a "window side") 3891 unit = None 3892 self._retreat(self._index - 1) 3893 3894 this = exp.Literal.string(parts[0]) 3895 unit = self.expression(exp.Var, this=parts[1].upper()) 3896 3897 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3898 unit = self.expression( 3899 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3900 ) 3901 3902 return self.expression(exp.Interval, this=this, unit=unit) 3903 3904 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3905 this = self._parse_term() 3906 3907 while True: 3908 if self._match_set(self.BITWISE): 3909 this = self.expression( 3910 self.BITWISE[self._prev.token_type], 3911 this=this, 3912 expression=self._parse_term(), 3913 ) 3914 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3915 this = self.expression( 3916 exp.DPipe, 3917 this=this, 3918 expression=self._parse_term(), 3919 safe=not self.dialect.STRICT_STRING_CONCAT, 3920 ) 3921 elif self._match(TokenType.DQMARK): 3922 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3923 elif self._match_pair(TokenType.LT, TokenType.LT): 3924 this = self.expression( 3925 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3926 ) 3927 elif self._match_pair(TokenType.GT, TokenType.GT): 3928 this = self.expression( 3929 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3930 ) 3931 else: 3932 break 3933 3934 return this 3935 3936 def _parse_term(self) -> t.Optional[exp.Expression]: 3937 return self._parse_tokens(self._parse_factor, self.TERM) 3938 3939 def _parse_factor(self) -> t.Optional[exp.Expression]: 3940 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3941 this = parse_method() 3942 3943 while self._match_set(self.FACTOR): 3944 this = self.expression( 3945 self.FACTOR[self._prev.token_type], 3946 this=this, 3947 comments=self._prev_comments, 3948 expression=parse_method(), 3949 ) 3950 if isinstance(this, exp.Div): 3951 this.args["typed"] = self.dialect.TYPED_DIVISION 3952 this.args["safe"] = self.dialect.SAFE_DIVISION 3953 3954 return this 3955 3956 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3957 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3958 3959 def _parse_unary(self) -> t.Optional[exp.Expression]: 3960 if self._match_set(self.UNARY_PARSERS): 3961 return self.UNARY_PARSERS[self._prev.token_type](self) 3962 return self._parse_at_time_zone(self._parse_type()) 3963 3964 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3965 interval = parse_interval and self._parse_interval() 3966 if interval: 3967 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3968 while True: 3969 index = self._index 3970 self._match(TokenType.PLUS) 3971 3972 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3973 self._retreat(index) 3974 break 3975 3976 interval = self.expression( # type: ignore 3977 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3978 ) 3979 3980 return interval 3981 3982 index = self._index 3983 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3984 this = self._parse_column() 3985 3986 if data_type: 3987 if isinstance(this, exp.Literal): 3988 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3989 if parser: 3990 return parser(self, this, data_type) 3991 return self.expression(exp.Cast, this=this, to=data_type) 3992 if not data_type.expressions: 3993 self._retreat(index) 3994 return self._parse_column() 3995 return self._parse_column_ops(data_type) 3996 3997 return this and self._parse_column_ops(this) 3998 3999 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4000 this = self._parse_type() 4001 if not this: 4002 return None 4003 4004 if isinstance(this, exp.Column) and not this.table: 4005 this = exp.var(this.name.upper()) 4006 4007 return self.expression( 4008 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4009 ) 4010 4011 def _parse_types( 4012 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4013 ) -> t.Optional[exp.Expression]: 4014 index = self._index 4015 4016 prefix = self._match_text_seq("SYSUDTLIB", ".") 4017 4018 if not self._match_set(self.TYPE_TOKENS): 4019 identifier = allow_identifiers and self._parse_id_var( 4020 any_token=False, tokens=(TokenType.VAR,) 4021 ) 4022 if identifier: 4023 tokens = self.dialect.tokenize(identifier.name) 4024 4025 if len(tokens) != 1: 4026 self.raise_error("Unexpected identifier", self._prev) 4027 4028 if tokens[0].token_type in self.TYPE_TOKENS: 4029 self._prev = tokens[0] 4030 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4031 type_name = identifier.name 4032 4033 while self._match(TokenType.DOT): 4034 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4035 4036 return exp.DataType.build(type_name, udt=True) 4037 else: 4038 self._retreat(self._index - 1) 4039 return None 4040 else: 4041 return None 4042 4043 type_token = self._prev.token_type 4044 4045 if type_token == TokenType.PSEUDO_TYPE: 4046 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4047 4048 if type_token == TokenType.OBJECT_IDENTIFIER: 4049 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4050 4051 nested = type_token in self.NESTED_TYPE_TOKENS 4052 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4053 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4054 expressions = None 4055 maybe_func = False 4056 4057 if self._match(TokenType.L_PAREN): 4058 if is_struct: 4059 expressions = self._parse_csv(self._parse_struct_types) 4060 elif nested: 4061 expressions = self._parse_csv( 4062 lambda: self._parse_types( 4063 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4064 ) 4065 ) 4066 elif type_token in self.ENUM_TYPE_TOKENS: 4067 expressions = self._parse_csv(self._parse_equality) 4068 elif is_aggregate: 4069 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4070 any_token=False, tokens=(TokenType.VAR,) 4071 ) 4072 if not func_or_ident or not self._match(TokenType.COMMA): 4073 return None 4074 expressions = self._parse_csv( 4075 lambda: self._parse_types( 4076 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4077 ) 4078 ) 4079 expressions.insert(0, func_or_ident) 4080 else: 4081 expressions = self._parse_csv(self._parse_type_size) 4082 4083 if not expressions or not self._match(TokenType.R_PAREN): 4084 self._retreat(index) 4085 return None 4086 4087 maybe_func = True 4088 4089 this: t.Optional[exp.Expression] = None 4090 values: t.Optional[t.List[exp.Expression]] = None 4091 4092 if nested and self._match(TokenType.LT): 4093 if is_struct: 4094 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4095 else: 4096 expressions = self._parse_csv( 4097 lambda: self._parse_types( 4098 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4099 ) 4100 ) 4101 4102 if not self._match(TokenType.GT): 4103 self.raise_error("Expecting >") 4104 4105 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4106 values = self._parse_csv(self._parse_conjunction) 4107 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4108 4109 if type_token in self.TIMESTAMPS: 4110 if self._match_text_seq("WITH", "TIME", "ZONE"): 4111 maybe_func = False 4112 tz_type = ( 4113 exp.DataType.Type.TIMETZ 4114 if type_token in self.TIMES 4115 else exp.DataType.Type.TIMESTAMPTZ 4116 ) 4117 this = exp.DataType(this=tz_type, expressions=expressions) 4118 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4119 maybe_func = False 4120 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4121 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4122 maybe_func = False 4123 elif type_token == TokenType.INTERVAL: 4124 unit = self._parse_var(upper=True) 4125 if unit: 4126 if self._match_text_seq("TO"): 4127 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4128 4129 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4130 else: 4131 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4132 4133 if maybe_func and check_func: 4134 index2 = self._index 4135 peek = self._parse_string() 4136 4137 if not peek: 4138 self._retreat(index) 4139 return None 4140 4141 self._retreat(index2) 4142 4143 if not this: 4144 if self._match_text_seq("UNSIGNED"): 4145 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4146 if not unsigned_type_token: 4147 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4148 4149 type_token = unsigned_type_token or type_token 4150 4151 this = exp.DataType( 4152 this=exp.DataType.Type[type_token.value], 4153 expressions=expressions, 4154 nested=nested, 4155 values=values, 4156 prefix=prefix, 4157 ) 4158 4159 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4160 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4161 4162 return this 4163 4164 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4165 index = self._index 4166 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4167 self._match(TokenType.COLON) 4168 column_def = self._parse_column_def(this) 4169 4170 if type_required and ( 4171 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4172 ): 4173 self._retreat(index) 4174 return self._parse_types() 4175 4176 return column_def 4177 4178 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4179 if not self._match_text_seq("AT", "TIME", "ZONE"): 4180 return this 4181 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4182 4183 def _parse_column(self) -> t.Optional[exp.Expression]: 4184 this = self._parse_column_reference() 4185 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4186 4187 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4188 this = self._parse_field() 4189 if ( 4190 not this 4191 and self._match(TokenType.VALUES, advance=False) 4192 and self.VALUES_FOLLOWED_BY_PAREN 4193 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4194 ): 4195 this = self._parse_id_var() 4196 4197 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4198 4199 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4200 this = self._parse_bracket(this) 4201 4202 while self._match_set(self.COLUMN_OPERATORS): 4203 op_token = self._prev.token_type 4204 op = self.COLUMN_OPERATORS.get(op_token) 4205 4206 if op_token == TokenType.DCOLON: 4207 field = self._parse_types() 4208 if not field: 4209 self.raise_error("Expected type") 4210 elif op and self._curr: 4211 field = self._parse_column_reference() 4212 else: 4213 field = self._parse_field(anonymous_func=True, any_token=True) 4214 4215 if isinstance(field, exp.Func) and this: 4216 # bigquery allows function calls like x.y.count(...) 4217 # SAFE.SUBSTR(...) 4218 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4219 this = exp.replace_tree( 4220 this, 4221 lambda n: ( 4222 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4223 if n.table 4224 else n.this 4225 ) 4226 if isinstance(n, exp.Column) 4227 else n, 4228 ) 4229 4230 if op: 4231 this = op(self, this, field) 4232 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4233 this = self.expression( 4234 exp.Column, 4235 this=field, 4236 table=this.this, 4237 db=this.args.get("table"), 4238 catalog=this.args.get("db"), 4239 ) 4240 else: 4241 this = self.expression(exp.Dot, this=this, expression=field) 4242 this = self._parse_bracket(this) 4243 return this 4244 4245 def _parse_primary(self) -> t.Optional[exp.Expression]: 4246 if self._match_set(self.PRIMARY_PARSERS): 4247 token_type = self._prev.token_type 4248 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4249 4250 if token_type == TokenType.STRING: 4251 expressions = [primary] 4252 while self._match(TokenType.STRING): 4253 expressions.append(exp.Literal.string(self._prev.text)) 4254 4255 if len(expressions) > 1: 4256 return self.expression(exp.Concat, expressions=expressions) 4257 4258 return primary 4259 4260 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4261 return exp.Literal.number(f"0.{self._prev.text}") 4262 4263 if self._match(TokenType.L_PAREN): 4264 comments = self._prev_comments 4265 query = self._parse_select() 4266 4267 if query: 4268 expressions = [query] 4269 else: 4270 expressions = self._parse_expressions() 4271 4272 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4273 4274 if isinstance(this, exp.UNWRAPPED_QUERIES): 4275 this = self._parse_set_operations( 4276 self._parse_subquery(this=this, parse_alias=False) 4277 ) 4278 elif isinstance(this, exp.Subquery): 4279 this = self._parse_subquery( 4280 this=self._parse_set_operations(this), parse_alias=False 4281 ) 4282 elif len(expressions) > 1: 4283 this = self.expression(exp.Tuple, expressions=expressions) 4284 else: 4285 this = self.expression(exp.Paren, this=this) 4286 4287 if this: 4288 this.add_comments(comments) 4289 4290 self._match_r_paren(expression=this) 4291 return this 4292 4293 return None 4294 4295 def _parse_field( 4296 self, 4297 any_token: bool = False, 4298 tokens: t.Optional[t.Collection[TokenType]] = None, 4299 anonymous_func: bool = False, 4300 ) -> t.Optional[exp.Expression]: 4301 return ( 4302 self._parse_primary() 4303 or self._parse_function(anonymous=anonymous_func) 4304 or self._parse_id_var(any_token=any_token, tokens=tokens) 4305 ) 4306 4307 def _parse_function( 4308 self, 4309 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4310 anonymous: bool = False, 4311 optional_parens: bool = True, 4312 ) -> t.Optional[exp.Expression]: 4313 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4314 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4315 fn_syntax = False 4316 if ( 4317 self._match(TokenType.L_BRACE, advance=False) 4318 and self._next 4319 and self._next.text.upper() == "FN" 4320 ): 4321 self._advance(2) 4322 fn_syntax = True 4323 4324 func = self._parse_function_call( 4325 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4326 ) 4327 4328 if fn_syntax: 4329 self._match(TokenType.R_BRACE) 4330 4331 return func 4332 4333 def _parse_function_call( 4334 self, 4335 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4336 anonymous: bool = False, 4337 optional_parens: bool = True, 4338 ) -> t.Optional[exp.Expression]: 4339 if not self._curr: 4340 return None 4341 4342 comments = self._curr.comments 4343 token_type = self._curr.token_type 4344 this = self._curr.text 4345 upper = this.upper() 4346 4347 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4348 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4349 self._advance() 4350 return self._parse_window(parser(self)) 4351 4352 if not self._next or self._next.token_type != TokenType.L_PAREN: 4353 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4354 self._advance() 4355 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4356 4357 return None 4358 4359 if token_type not in self.FUNC_TOKENS: 4360 return None 4361 4362 self._advance(2) 4363 4364 parser = self.FUNCTION_PARSERS.get(upper) 4365 if parser and not anonymous: 4366 this = parser(self) 4367 else: 4368 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4369 4370 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4371 this = self.expression(subquery_predicate, this=self._parse_select()) 4372 self._match_r_paren() 4373 return this 4374 4375 if functions is None: 4376 functions = self.FUNCTIONS 4377 4378 function = functions.get(upper) 4379 4380 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4381 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4382 4383 if alias: 4384 args = self._kv_to_prop_eq(args) 4385 4386 if function and not anonymous: 4387 if "dialect" in function.__code__.co_varnames: 4388 func = function(args, dialect=self.dialect) 4389 else: 4390 func = function(args) 4391 4392 func = self.validate_expression(func, args) 4393 if not self.dialect.NORMALIZE_FUNCTIONS: 4394 func.meta["name"] = this 4395 4396 this = func 4397 else: 4398 if token_type == TokenType.IDENTIFIER: 4399 this = exp.Identifier(this=this, quoted=True) 4400 this = self.expression(exp.Anonymous, this=this, expressions=args) 4401 4402 if isinstance(this, exp.Expression): 4403 this.add_comments(comments) 4404 4405 self._match_r_paren(this) 4406 return self._parse_window(this) 4407 4408 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4409 transformed = [] 4410 4411 for e in expressions: 4412 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4413 if isinstance(e, exp.Alias): 4414 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4415 4416 if not isinstance(e, exp.PropertyEQ): 4417 e = self.expression( 4418 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4419 ) 4420 4421 if isinstance(e.this, exp.Column): 4422 e.this.replace(e.this.this) 4423 4424 transformed.append(e) 4425 4426 return transformed 4427 4428 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4429 return self._parse_column_def(self._parse_id_var()) 4430 4431 def _parse_user_defined_function( 4432 self, kind: t.Optional[TokenType] = None 4433 ) -> t.Optional[exp.Expression]: 4434 this = self._parse_id_var() 4435 4436 while self._match(TokenType.DOT): 4437 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4438 4439 if not self._match(TokenType.L_PAREN): 4440 return this 4441 4442 expressions = self._parse_csv(self._parse_function_parameter) 4443 self._match_r_paren() 4444 return self.expression( 4445 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4446 ) 4447 4448 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4449 literal = self._parse_primary() 4450 if literal: 4451 return self.expression(exp.Introducer, this=token.text, expression=literal) 4452 4453 return self.expression(exp.Identifier, this=token.text) 4454 4455 def _parse_session_parameter(self) -> exp.SessionParameter: 4456 kind = None 4457 this = self._parse_id_var() or self._parse_primary() 4458 4459 if this and self._match(TokenType.DOT): 4460 kind = this.name 4461 this = self._parse_var() or self._parse_primary() 4462 4463 return self.expression(exp.SessionParameter, this=this, kind=kind) 4464 4465 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4466 index = self._index 4467 4468 if self._match(TokenType.L_PAREN): 4469 expressions = t.cast( 4470 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4471 ) 4472 4473 if not self._match(TokenType.R_PAREN): 4474 self._retreat(index) 4475 else: 4476 expressions = [self._parse_id_var()] 4477 4478 if self._match_set(self.LAMBDAS): 4479 return self.LAMBDAS[self._prev.token_type](self, expressions) 4480 4481 self._retreat(index) 4482 4483 this: t.Optional[exp.Expression] 4484 4485 if self._match(TokenType.DISTINCT): 4486 this = self.expression( 4487 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4488 ) 4489 else: 4490 this = self._parse_select_or_expression(alias=alias) 4491 4492 return self._parse_limit( 4493 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4494 ) 4495 4496 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4497 index = self._index 4498 4499 if not self._match(TokenType.L_PAREN): 4500 return this 4501 4502 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4503 # expr can be of both types 4504 if self._match_set(self.SELECT_START_TOKENS): 4505 self._retreat(index) 4506 return this 4507 4508 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4509 4510 self._match_r_paren() 4511 return self.expression(exp.Schema, this=this, expressions=args) 4512 4513 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4514 return self._parse_column_def(self._parse_field(any_token=True)) 4515 4516 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4517 # column defs are not really columns, they're identifiers 4518 if isinstance(this, exp.Column): 4519 this = this.this 4520 4521 kind = self._parse_types(schema=True) 4522 4523 if self._match_text_seq("FOR", "ORDINALITY"): 4524 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4525 4526 constraints: t.List[exp.Expression] = [] 4527 4528 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4529 ("ALIAS", "MATERIALIZED") 4530 ): 4531 persisted = self._prev.text.upper() == "MATERIALIZED" 4532 constraints.append( 4533 self.expression( 4534 exp.ComputedColumnConstraint, 4535 this=self._parse_conjunction(), 4536 persisted=persisted or self._match_text_seq("PERSISTED"), 4537 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4538 ) 4539 ) 4540 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4541 self._match(TokenType.ALIAS) 4542 constraints.append( 4543 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4544 ) 4545 4546 while True: 4547 constraint = self._parse_column_constraint() 4548 if not constraint: 4549 break 4550 constraints.append(constraint) 4551 4552 if not kind and not constraints: 4553 return this 4554 4555 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4556 4557 def _parse_auto_increment( 4558 self, 4559 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4560 start = None 4561 increment = None 4562 4563 if self._match(TokenType.L_PAREN, advance=False): 4564 args = self._parse_wrapped_csv(self._parse_bitwise) 4565 start = seq_get(args, 0) 4566 increment = seq_get(args, 1) 4567 elif self._match_text_seq("START"): 4568 start = self._parse_bitwise() 4569 self._match_text_seq("INCREMENT") 4570 increment = self._parse_bitwise() 4571 4572 if start and increment: 4573 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4574 4575 return exp.AutoIncrementColumnConstraint() 4576 4577 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4578 if not self._match_text_seq("REFRESH"): 4579 self._retreat(self._index - 1) 4580 return None 4581 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4582 4583 def _parse_compress(self) -> exp.CompressColumnConstraint: 4584 if self._match(TokenType.L_PAREN, advance=False): 4585 return self.expression( 4586 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4587 ) 4588 4589 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4590 4591 def _parse_generated_as_identity( 4592 self, 4593 ) -> ( 4594 exp.GeneratedAsIdentityColumnConstraint 4595 | exp.ComputedColumnConstraint 4596 | exp.GeneratedAsRowColumnConstraint 4597 ): 4598 if self._match_text_seq("BY", "DEFAULT"): 4599 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4600 this = self.expression( 4601 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4602 ) 4603 else: 4604 self._match_text_seq("ALWAYS") 4605 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4606 4607 self._match(TokenType.ALIAS) 4608 4609 if self._match_text_seq("ROW"): 4610 start = self._match_text_seq("START") 4611 if not start: 4612 self._match(TokenType.END) 4613 hidden = self._match_text_seq("HIDDEN") 4614 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4615 4616 identity = self._match_text_seq("IDENTITY") 4617 4618 if self._match(TokenType.L_PAREN): 4619 if self._match(TokenType.START_WITH): 4620 this.set("start", self._parse_bitwise()) 4621 if self._match_text_seq("INCREMENT", "BY"): 4622 this.set("increment", self._parse_bitwise()) 4623 if self._match_text_seq("MINVALUE"): 4624 this.set("minvalue", self._parse_bitwise()) 4625 if self._match_text_seq("MAXVALUE"): 4626 this.set("maxvalue", self._parse_bitwise()) 4627 4628 if self._match_text_seq("CYCLE"): 4629 this.set("cycle", True) 4630 elif self._match_text_seq("NO", "CYCLE"): 4631 this.set("cycle", False) 4632 4633 if not identity: 4634 this.set("expression", self._parse_bitwise()) 4635 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4636 args = self._parse_csv(self._parse_bitwise) 4637 this.set("start", seq_get(args, 0)) 4638 this.set("increment", seq_get(args, 1)) 4639 4640 self._match_r_paren() 4641 4642 return this 4643 4644 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4645 self._match_text_seq("LENGTH") 4646 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4647 4648 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4649 if self._match_text_seq("NULL"): 4650 return self.expression(exp.NotNullColumnConstraint) 4651 if self._match_text_seq("CASESPECIFIC"): 4652 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4653 if self._match_text_seq("FOR", "REPLICATION"): 4654 return self.expression(exp.NotForReplicationColumnConstraint) 4655 return None 4656 4657 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4658 if self._match(TokenType.CONSTRAINT): 4659 this = self._parse_id_var() 4660 else: 4661 this = None 4662 4663 if self._match_texts(self.CONSTRAINT_PARSERS): 4664 return self.expression( 4665 exp.ColumnConstraint, 4666 this=this, 4667 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4668 ) 4669 4670 return this 4671 4672 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4673 if not self._match(TokenType.CONSTRAINT): 4674 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4675 4676 return self.expression( 4677 exp.Constraint, 4678 this=self._parse_id_var(), 4679 expressions=self._parse_unnamed_constraints(), 4680 ) 4681 4682 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4683 constraints = [] 4684 while True: 4685 constraint = self._parse_unnamed_constraint() or self._parse_function() 4686 if not constraint: 4687 break 4688 constraints.append(constraint) 4689 4690 return constraints 4691 4692 def _parse_unnamed_constraint( 4693 self, constraints: t.Optional[t.Collection[str]] = None 4694 ) -> t.Optional[exp.Expression]: 4695 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4696 constraints or self.CONSTRAINT_PARSERS 4697 ): 4698 return None 4699 4700 constraint = self._prev.text.upper() 4701 if constraint not in self.CONSTRAINT_PARSERS: 4702 self.raise_error(f"No parser found for schema constraint {constraint}.") 4703 4704 return self.CONSTRAINT_PARSERS[constraint](self) 4705 4706 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4707 self._match_text_seq("KEY") 4708 return self.expression( 4709 exp.UniqueColumnConstraint, 4710 this=self._parse_schema(self._parse_id_var(any_token=False)), 4711 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4712 on_conflict=self._parse_on_conflict(), 4713 ) 4714 4715 def _parse_key_constraint_options(self) -> t.List[str]: 4716 options = [] 4717 while True: 4718 if not self._curr: 4719 break 4720 4721 if self._match(TokenType.ON): 4722 action = None 4723 on = self._advance_any() and self._prev.text 4724 4725 if self._match_text_seq("NO", "ACTION"): 4726 action = "NO ACTION" 4727 elif self._match_text_seq("CASCADE"): 4728 action = "CASCADE" 4729 elif self._match_text_seq("RESTRICT"): 4730 action = "RESTRICT" 4731 elif self._match_pair(TokenType.SET, TokenType.NULL): 4732 action = "SET NULL" 4733 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4734 action = "SET DEFAULT" 4735 else: 4736 self.raise_error("Invalid key constraint") 4737 4738 options.append(f"ON {on} {action}") 4739 elif self._match_text_seq("NOT", "ENFORCED"): 4740 options.append("NOT ENFORCED") 4741 elif self._match_text_seq("DEFERRABLE"): 4742 options.append("DEFERRABLE") 4743 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4744 options.append("INITIALLY DEFERRED") 4745 elif self._match_text_seq("NORELY"): 4746 options.append("NORELY") 4747 elif self._match_text_seq("MATCH", "FULL"): 4748 options.append("MATCH FULL") 4749 else: 4750 break 4751 4752 return options 4753 4754 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4755 if match and not self._match(TokenType.REFERENCES): 4756 return None 4757 4758 expressions = None 4759 this = self._parse_table(schema=True) 4760 options = self._parse_key_constraint_options() 4761 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4762 4763 def _parse_foreign_key(self) -> exp.ForeignKey: 4764 expressions = self._parse_wrapped_id_vars() 4765 reference = self._parse_references() 4766 options = {} 4767 4768 while self._match(TokenType.ON): 4769 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4770 self.raise_error("Expected DELETE or UPDATE") 4771 4772 kind = self._prev.text.lower() 4773 4774 if self._match_text_seq("NO", "ACTION"): 4775 action = "NO ACTION" 4776 elif self._match(TokenType.SET): 4777 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4778 action = "SET " + self._prev.text.upper() 4779 else: 4780 self._advance() 4781 action = self._prev.text.upper() 4782 4783 options[kind] = action 4784 4785 return self.expression( 4786 exp.ForeignKey, 4787 expressions=expressions, 4788 reference=reference, 4789 **options, # type: ignore 4790 ) 4791 4792 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4793 return self._parse_field() 4794 4795 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4796 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4797 self._retreat(self._index - 1) 4798 return None 4799 4800 id_vars = self._parse_wrapped_id_vars() 4801 return self.expression( 4802 exp.PeriodForSystemTimeConstraint, 4803 this=seq_get(id_vars, 0), 4804 expression=seq_get(id_vars, 1), 4805 ) 4806 4807 def _parse_primary_key( 4808 self, wrapped_optional: bool = False, in_props: bool = False 4809 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4810 desc = ( 4811 self._match_set((TokenType.ASC, TokenType.DESC)) 4812 and self._prev.token_type == TokenType.DESC 4813 ) 4814 4815 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4816 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4817 4818 expressions = self._parse_wrapped_csv( 4819 self._parse_primary_key_part, optional=wrapped_optional 4820 ) 4821 options = self._parse_key_constraint_options() 4822 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4823 4824 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4825 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4826 4827 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4828 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4829 return this 4830 4831 bracket_kind = self._prev.token_type 4832 expressions = self._parse_csv( 4833 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4834 ) 4835 4836 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4837 self.raise_error("Expected ]") 4838 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4839 self.raise_error("Expected }") 4840 4841 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4842 if bracket_kind == TokenType.L_BRACE: 4843 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4844 elif not this or this.name.upper() == "ARRAY": 4845 this = self.expression(exp.Array, expressions=expressions) 4846 else: 4847 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4848 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4849 4850 self._add_comments(this) 4851 return self._parse_bracket(this) 4852 4853 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4854 if self._match(TokenType.COLON): 4855 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4856 return this 4857 4858 def _parse_case(self) -> t.Optional[exp.Expression]: 4859 ifs = [] 4860 default = None 4861 4862 comments = self._prev_comments 4863 expression = self._parse_conjunction() 4864 4865 while self._match(TokenType.WHEN): 4866 this = self._parse_conjunction() 4867 self._match(TokenType.THEN) 4868 then = self._parse_conjunction() 4869 ifs.append(self.expression(exp.If, this=this, true=then)) 4870 4871 if self._match(TokenType.ELSE): 4872 default = self._parse_conjunction() 4873 4874 if not self._match(TokenType.END): 4875 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4876 default = exp.column("interval") 4877 else: 4878 self.raise_error("Expected END after CASE", self._prev) 4879 4880 return self.expression( 4881 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4882 ) 4883 4884 def _parse_if(self) -> t.Optional[exp.Expression]: 4885 if self._match(TokenType.L_PAREN): 4886 args = self._parse_csv(self._parse_conjunction) 4887 this = self.validate_expression(exp.If.from_arg_list(args), args) 4888 self._match_r_paren() 4889 else: 4890 index = self._index - 1 4891 4892 if self.NO_PAREN_IF_COMMANDS and index == 0: 4893 return self._parse_as_command(self._prev) 4894 4895 condition = self._parse_conjunction() 4896 4897 if not condition: 4898 self._retreat(index) 4899 return None 4900 4901 self._match(TokenType.THEN) 4902 true = self._parse_conjunction() 4903 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4904 self._match(TokenType.END) 4905 this = self.expression(exp.If, this=condition, true=true, false=false) 4906 4907 return this 4908 4909 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4910 if not self._match_text_seq("VALUE", "FOR"): 4911 self._retreat(self._index - 1) 4912 return None 4913 4914 return self.expression( 4915 exp.NextValueFor, 4916 this=self._parse_column(), 4917 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4918 ) 4919 4920 def _parse_extract(self) -> exp.Extract: 4921 this = self._parse_function() or self._parse_var() or self._parse_type() 4922 4923 if self._match(TokenType.FROM): 4924 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4925 4926 if not self._match(TokenType.COMMA): 4927 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4928 4929 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4930 4931 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4932 this = self._parse_conjunction() 4933 4934 if not self._match(TokenType.ALIAS): 4935 if self._match(TokenType.COMMA): 4936 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4937 4938 self.raise_error("Expected AS after CAST") 4939 4940 fmt = None 4941 to = self._parse_types() 4942 4943 if self._match(TokenType.FORMAT): 4944 fmt_string = self._parse_string() 4945 fmt = self._parse_at_time_zone(fmt_string) 4946 4947 if not to: 4948 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4949 if to.this in exp.DataType.TEMPORAL_TYPES: 4950 this = self.expression( 4951 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4952 this=this, 4953 format=exp.Literal.string( 4954 format_time( 4955 fmt_string.this if fmt_string else "", 4956 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4957 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4958 ) 4959 ), 4960 ) 4961 4962 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4963 this.set("zone", fmt.args["zone"]) 4964 return this 4965 elif not to: 4966 self.raise_error("Expected TYPE after CAST") 4967 elif isinstance(to, exp.Identifier): 4968 to = exp.DataType.build(to.name, udt=True) 4969 elif to.this == exp.DataType.Type.CHAR: 4970 if self._match(TokenType.CHARACTER_SET): 4971 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4972 4973 return self.expression( 4974 exp.Cast if strict else exp.TryCast, 4975 this=this, 4976 to=to, 4977 format=fmt, 4978 safe=safe, 4979 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4980 ) 4981 4982 def _parse_string_agg(self) -> exp.Expression: 4983 if self._match(TokenType.DISTINCT): 4984 args: t.List[t.Optional[exp.Expression]] = [ 4985 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4986 ] 4987 if self._match(TokenType.COMMA): 4988 args.extend(self._parse_csv(self._parse_conjunction)) 4989 else: 4990 args = self._parse_csv(self._parse_conjunction) # type: ignore 4991 4992 index = self._index 4993 if not self._match(TokenType.R_PAREN) and args: 4994 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4995 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4996 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4997 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4998 4999 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5000 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5001 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5002 if not self._match_text_seq("WITHIN", "GROUP"): 5003 self._retreat(index) 5004 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5005 5006 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5007 order = self._parse_order(this=seq_get(args, 0)) 5008 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5009 5010 def _parse_convert( 5011 self, strict: bool, safe: t.Optional[bool] = None 5012 ) -> t.Optional[exp.Expression]: 5013 this = self._parse_bitwise() 5014 5015 if self._match(TokenType.USING): 5016 to: t.Optional[exp.Expression] = self.expression( 5017 exp.CharacterSet, this=self._parse_var() 5018 ) 5019 elif self._match(TokenType.COMMA): 5020 to = self._parse_types() 5021 else: 5022 to = None 5023 5024 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5025 5026 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5027 """ 5028 There are generally two variants of the DECODE function: 5029 5030 - DECODE(bin, charset) 5031 - DECODE(expression, search, result [, search, result] ... [, default]) 5032 5033 The second variant will always be parsed into a CASE expression. Note that NULL 5034 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5035 instead of relying on pattern matching. 5036 """ 5037 args = self._parse_csv(self._parse_conjunction) 5038 5039 if len(args) < 3: 5040 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5041 5042 expression, *expressions = args 5043 if not expression: 5044 return None 5045 5046 ifs = [] 5047 for search, result in zip(expressions[::2], expressions[1::2]): 5048 if not search or not result: 5049 return None 5050 5051 if isinstance(search, exp.Literal): 5052 ifs.append( 5053 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5054 ) 5055 elif isinstance(search, exp.Null): 5056 ifs.append( 5057 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5058 ) 5059 else: 5060 cond = exp.or_( 5061 exp.EQ(this=expression.copy(), expression=search), 5062 exp.and_( 5063 exp.Is(this=expression.copy(), expression=exp.Null()), 5064 exp.Is(this=search.copy(), expression=exp.Null()), 5065 copy=False, 5066 ), 5067 copy=False, 5068 ) 5069 ifs.append(exp.If(this=cond, true=result)) 5070 5071 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5072 5073 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5074 self._match_text_seq("KEY") 5075 key = self._parse_column() 5076 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5077 self._match_text_seq("VALUE") 5078 value = self._parse_bitwise() 5079 5080 if not key and not value: 5081 return None 5082 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5083 5084 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5085 if not this or not self._match_text_seq("FORMAT", "JSON"): 5086 return this 5087 5088 return self.expression(exp.FormatJson, this=this) 5089 5090 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5091 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5092 for value in values: 5093 if self._match_text_seq(value, "ON", on): 5094 return f"{value} ON {on}" 5095 5096 return None 5097 5098 @t.overload 5099 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5100 5101 @t.overload 5102 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5103 5104 def _parse_json_object(self, agg=False): 5105 star = self._parse_star() 5106 expressions = ( 5107 [star] 5108 if star 5109 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5110 ) 5111 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5112 5113 unique_keys = None 5114 if self._match_text_seq("WITH", "UNIQUE"): 5115 unique_keys = True 5116 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5117 unique_keys = False 5118 5119 self._match_text_seq("KEYS") 5120 5121 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5122 self._parse_type() 5123 ) 5124 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5125 5126 return self.expression( 5127 exp.JSONObjectAgg if agg else exp.JSONObject, 5128 expressions=expressions, 5129 null_handling=null_handling, 5130 unique_keys=unique_keys, 5131 return_type=return_type, 5132 encoding=encoding, 5133 ) 5134 5135 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5136 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5137 if not self._match_text_seq("NESTED"): 5138 this = self._parse_id_var() 5139 kind = self._parse_types(allow_identifiers=False) 5140 nested = None 5141 else: 5142 this = None 5143 kind = None 5144 nested = True 5145 5146 path = self._match_text_seq("PATH") and self._parse_string() 5147 nested_schema = nested and self._parse_json_schema() 5148 5149 return self.expression( 5150 exp.JSONColumnDef, 5151 this=this, 5152 kind=kind, 5153 path=path, 5154 nested_schema=nested_schema, 5155 ) 5156 5157 def _parse_json_schema(self) -> exp.JSONSchema: 5158 self._match_text_seq("COLUMNS") 5159 return self.expression( 5160 exp.JSONSchema, 5161 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5162 ) 5163 5164 def _parse_json_table(self) -> exp.JSONTable: 5165 this = self._parse_format_json(self._parse_bitwise()) 5166 path = self._match(TokenType.COMMA) and self._parse_string() 5167 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5168 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5169 schema = self._parse_json_schema() 5170 5171 return exp.JSONTable( 5172 this=this, 5173 schema=schema, 5174 path=path, 5175 error_handling=error_handling, 5176 empty_handling=empty_handling, 5177 ) 5178 5179 def _parse_match_against(self) -> exp.MatchAgainst: 5180 expressions = self._parse_csv(self._parse_column) 5181 5182 self._match_text_seq(")", "AGAINST", "(") 5183 5184 this = self._parse_string() 5185 5186 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5187 modifier = "IN NATURAL LANGUAGE MODE" 5188 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5189 modifier = f"{modifier} WITH QUERY EXPANSION" 5190 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5191 modifier = "IN BOOLEAN MODE" 5192 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5193 modifier = "WITH QUERY EXPANSION" 5194 else: 5195 modifier = None 5196 5197 return self.expression( 5198 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5199 ) 5200 5201 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5202 def _parse_open_json(self) -> exp.OpenJSON: 5203 this = self._parse_bitwise() 5204 path = self._match(TokenType.COMMA) and self._parse_string() 5205 5206 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5207 this = self._parse_field(any_token=True) 5208 kind = self._parse_types() 5209 path = self._parse_string() 5210 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5211 5212 return self.expression( 5213 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5214 ) 5215 5216 expressions = None 5217 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5218 self._match_l_paren() 5219 expressions = self._parse_csv(_parse_open_json_column_def) 5220 5221 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5222 5223 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5224 args = self._parse_csv(self._parse_bitwise) 5225 5226 if self._match(TokenType.IN): 5227 return self.expression( 5228 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5229 ) 5230 5231 if haystack_first: 5232 haystack = seq_get(args, 0) 5233 needle = seq_get(args, 1) 5234 else: 5235 needle = seq_get(args, 0) 5236 haystack = seq_get(args, 1) 5237 5238 return self.expression( 5239 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5240 ) 5241 5242 def _parse_predict(self) -> exp.Predict: 5243 self._match_text_seq("MODEL") 5244 this = self._parse_table() 5245 5246 self._match(TokenType.COMMA) 5247 self._match_text_seq("TABLE") 5248 5249 return self.expression( 5250 exp.Predict, 5251 this=this, 5252 expression=self._parse_table(), 5253 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5254 ) 5255 5256 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5257 args = self._parse_csv(self._parse_table) 5258 return exp.JoinHint(this=func_name.upper(), expressions=args) 5259 5260 def _parse_substring(self) -> exp.Substring: 5261 # Postgres supports the form: substring(string [from int] [for int]) 5262 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5263 5264 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5265 5266 if self._match(TokenType.FROM): 5267 args.append(self._parse_bitwise()) 5268 if self._match(TokenType.FOR): 5269 args.append(self._parse_bitwise()) 5270 5271 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5272 5273 def _parse_trim(self) -> exp.Trim: 5274 # https://www.w3resource.com/sql/character-functions/trim.php 5275 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5276 5277 position = None 5278 collation = None 5279 expression = None 5280 5281 if self._match_texts(self.TRIM_TYPES): 5282 position = self._prev.text.upper() 5283 5284 this = self._parse_bitwise() 5285 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5286 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5287 expression = self._parse_bitwise() 5288 5289 if invert_order: 5290 this, expression = expression, this 5291 5292 if self._match(TokenType.COLLATE): 5293 collation = self._parse_bitwise() 5294 5295 return self.expression( 5296 exp.Trim, this=this, position=position, expression=expression, collation=collation 5297 ) 5298 5299 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5300 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5301 5302 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5303 return self._parse_window(self._parse_id_var(), alias=True) 5304 5305 def _parse_respect_or_ignore_nulls( 5306 self, this: t.Optional[exp.Expression] 5307 ) -> t.Optional[exp.Expression]: 5308 if self._match_text_seq("IGNORE", "NULLS"): 5309 return self.expression(exp.IgnoreNulls, this=this) 5310 if self._match_text_seq("RESPECT", "NULLS"): 5311 return self.expression(exp.RespectNulls, this=this) 5312 return this 5313 5314 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5315 if self._match(TokenType.HAVING): 5316 self._match_texts(("MAX", "MIN")) 5317 max = self._prev.text.upper() != "MIN" 5318 return self.expression( 5319 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5320 ) 5321 5322 return this 5323 5324 def _parse_window( 5325 self, this: t.Optional[exp.Expression], alias: bool = False 5326 ) -> t.Optional[exp.Expression]: 5327 func = this 5328 comments = func.comments if isinstance(func, exp.Expression) else None 5329 5330 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5331 self._match(TokenType.WHERE) 5332 this = self.expression( 5333 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5334 ) 5335 self._match_r_paren() 5336 5337 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5338 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5339 if self._match_text_seq("WITHIN", "GROUP"): 5340 order = self._parse_wrapped(self._parse_order) 5341 this = self.expression(exp.WithinGroup, this=this, expression=order) 5342 5343 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5344 # Some dialects choose to implement and some do not. 5345 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5346 5347 # There is some code above in _parse_lambda that handles 5348 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5349 5350 # The below changes handle 5351 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5352 5353 # Oracle allows both formats 5354 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5355 # and Snowflake chose to do the same for familiarity 5356 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5357 if isinstance(this, exp.AggFunc): 5358 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5359 5360 if ignore_respect and ignore_respect is not this: 5361 ignore_respect.replace(ignore_respect.this) 5362 this = self.expression(ignore_respect.__class__, this=this) 5363 5364 this = self._parse_respect_or_ignore_nulls(this) 5365 5366 # bigquery select from window x AS (partition by ...) 5367 if alias: 5368 over = None 5369 self._match(TokenType.ALIAS) 5370 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5371 return this 5372 else: 5373 over = self._prev.text.upper() 5374 5375 if comments: 5376 func.comments = None # type: ignore 5377 5378 if not self._match(TokenType.L_PAREN): 5379 return self.expression( 5380 exp.Window, 5381 comments=comments, 5382 this=this, 5383 alias=self._parse_id_var(False), 5384 over=over, 5385 ) 5386 5387 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5388 5389 first = self._match(TokenType.FIRST) 5390 if self._match_text_seq("LAST"): 5391 first = False 5392 5393 partition, order = self._parse_partition_and_order() 5394 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5395 5396 if kind: 5397 self._match(TokenType.BETWEEN) 5398 start = self._parse_window_spec() 5399 self._match(TokenType.AND) 5400 end = self._parse_window_spec() 5401 5402 spec = self.expression( 5403 exp.WindowSpec, 5404 kind=kind, 5405 start=start["value"], 5406 start_side=start["side"], 5407 end=end["value"], 5408 end_side=end["side"], 5409 ) 5410 else: 5411 spec = None 5412 5413 self._match_r_paren() 5414 5415 window = self.expression( 5416 exp.Window, 5417 comments=comments, 5418 this=this, 5419 partition_by=partition, 5420 order=order, 5421 spec=spec, 5422 alias=window_alias, 5423 over=over, 5424 first=first, 5425 ) 5426 5427 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5428 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5429 return self._parse_window(window, alias=alias) 5430 5431 return window 5432 5433 def _parse_partition_and_order( 5434 self, 5435 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5436 return self._parse_partition_by(), self._parse_order() 5437 5438 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5439 self._match(TokenType.BETWEEN) 5440 5441 return { 5442 "value": ( 5443 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5444 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5445 or self._parse_bitwise() 5446 ), 5447 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5448 } 5449 5450 def _parse_alias( 5451 self, this: t.Optional[exp.Expression], explicit: bool = False 5452 ) -> t.Optional[exp.Expression]: 5453 any_token = self._match(TokenType.ALIAS) 5454 comments = self._prev_comments 5455 5456 if explicit and not any_token: 5457 return this 5458 5459 if self._match(TokenType.L_PAREN): 5460 aliases = self.expression( 5461 exp.Aliases, 5462 comments=comments, 5463 this=this, 5464 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5465 ) 5466 self._match_r_paren(aliases) 5467 return aliases 5468 5469 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5470 self.STRING_ALIASES and self._parse_string_as_identifier() 5471 ) 5472 5473 if alias: 5474 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5475 column = this.this 5476 5477 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5478 if not this.comments and column and column.comments: 5479 this.comments = column.comments 5480 column.comments = None 5481 5482 return this 5483 5484 def _parse_id_var( 5485 self, 5486 any_token: bool = True, 5487 tokens: t.Optional[t.Collection[TokenType]] = None, 5488 ) -> t.Optional[exp.Expression]: 5489 identifier = self._parse_identifier() 5490 5491 if identifier: 5492 return identifier 5493 5494 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5495 quoted = self._prev.token_type == TokenType.STRING 5496 return exp.Identifier(this=self._prev.text, quoted=quoted) 5497 5498 return None 5499 5500 def _parse_string(self) -> t.Optional[exp.Expression]: 5501 if self._match_set(self.STRING_PARSERS): 5502 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5503 return self._parse_placeholder() 5504 5505 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5506 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5507 5508 def _parse_number(self) -> t.Optional[exp.Expression]: 5509 if self._match_set(self.NUMERIC_PARSERS): 5510 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5511 return self._parse_placeholder() 5512 5513 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5514 if self._match(TokenType.IDENTIFIER): 5515 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5516 return self._parse_placeholder() 5517 5518 def _parse_var( 5519 self, 5520 any_token: bool = False, 5521 tokens: t.Optional[t.Collection[TokenType]] = None, 5522 upper: bool = False, 5523 ) -> t.Optional[exp.Expression]: 5524 if ( 5525 (any_token and self._advance_any()) 5526 or self._match(TokenType.VAR) 5527 or (self._match_set(tokens) if tokens else False) 5528 ): 5529 return self.expression( 5530 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5531 ) 5532 return self._parse_placeholder() 5533 5534 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5535 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5536 self._advance() 5537 return self._prev 5538 return None 5539 5540 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5541 return self._parse_var() or self._parse_string() 5542 5543 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5544 return self._parse_primary() or self._parse_var(any_token=True) 5545 5546 def _parse_null(self) -> t.Optional[exp.Expression]: 5547 if self._match_set(self.NULL_TOKENS): 5548 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5549 return self._parse_placeholder() 5550 5551 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5552 if self._match(TokenType.TRUE): 5553 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5554 if self._match(TokenType.FALSE): 5555 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5556 return self._parse_placeholder() 5557 5558 def _parse_star(self) -> t.Optional[exp.Expression]: 5559 if self._match(TokenType.STAR): 5560 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5561 return self._parse_placeholder() 5562 5563 def _parse_parameter(self) -> exp.Parameter: 5564 self._match(TokenType.L_BRACE) 5565 this = self._parse_identifier() or self._parse_primary_or_var() 5566 expression = self._match(TokenType.COLON) and ( 5567 self._parse_identifier() or self._parse_primary_or_var() 5568 ) 5569 self._match(TokenType.R_BRACE) 5570 return self.expression(exp.Parameter, this=this, expression=expression) 5571 5572 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5573 if self._match_set(self.PLACEHOLDER_PARSERS): 5574 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5575 if placeholder: 5576 return placeholder 5577 self._advance(-1) 5578 return None 5579 5580 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5581 if not self._match(TokenType.EXCEPT): 5582 return None 5583 if self._match(TokenType.L_PAREN, advance=False): 5584 return self._parse_wrapped_csv(self._parse_column) 5585 5586 except_column = self._parse_column() 5587 return [except_column] if except_column else None 5588 5589 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5590 if not self._match(TokenType.REPLACE): 5591 return None 5592 if self._match(TokenType.L_PAREN, advance=False): 5593 return self._parse_wrapped_csv(self._parse_expression) 5594 5595 replace_expression = self._parse_expression() 5596 return [replace_expression] if replace_expression else None 5597 5598 def _parse_csv( 5599 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5600 ) -> t.List[exp.Expression]: 5601 parse_result = parse_method() 5602 items = [parse_result] if parse_result is not None else [] 5603 5604 while self._match(sep): 5605 self._add_comments(parse_result) 5606 parse_result = parse_method() 5607 if parse_result is not None: 5608 items.append(parse_result) 5609 5610 return items 5611 5612 def _parse_tokens( 5613 self, parse_method: t.Callable, expressions: t.Dict 5614 ) -> t.Optional[exp.Expression]: 5615 this = parse_method() 5616 5617 while self._match_set(expressions): 5618 this = self.expression( 5619 expressions[self._prev.token_type], 5620 this=this, 5621 comments=self._prev_comments, 5622 expression=parse_method(), 5623 ) 5624 5625 return this 5626 5627 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5628 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5629 5630 def _parse_wrapped_csv( 5631 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5632 ) -> t.List[exp.Expression]: 5633 return self._parse_wrapped( 5634 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5635 ) 5636 5637 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5638 wrapped = self._match(TokenType.L_PAREN) 5639 if not wrapped and not optional: 5640 self.raise_error("Expecting (") 5641 parse_result = parse_method() 5642 if wrapped: 5643 self._match_r_paren() 5644 return parse_result 5645 5646 def _parse_expressions(self) -> t.List[exp.Expression]: 5647 return self._parse_csv(self._parse_expression) 5648 5649 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5650 return self._parse_select() or self._parse_set_operations( 5651 self._parse_expression() if alias else self._parse_conjunction() 5652 ) 5653 5654 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5655 return self._parse_query_modifiers( 5656 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5657 ) 5658 5659 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5660 this = None 5661 if self._match_texts(self.TRANSACTION_KIND): 5662 this = self._prev.text 5663 5664 self._match_texts(("TRANSACTION", "WORK")) 5665 5666 modes = [] 5667 while True: 5668 mode = [] 5669 while self._match(TokenType.VAR): 5670 mode.append(self._prev.text) 5671 5672 if mode: 5673 modes.append(" ".join(mode)) 5674 if not self._match(TokenType.COMMA): 5675 break 5676 5677 return self.expression(exp.Transaction, this=this, modes=modes) 5678 5679 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5680 chain = None 5681 savepoint = None 5682 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5683 5684 self._match_texts(("TRANSACTION", "WORK")) 5685 5686 if self._match_text_seq("TO"): 5687 self._match_text_seq("SAVEPOINT") 5688 savepoint = self._parse_id_var() 5689 5690 if self._match(TokenType.AND): 5691 chain = not self._match_text_seq("NO") 5692 self._match_text_seq("CHAIN") 5693 5694 if is_rollback: 5695 return self.expression(exp.Rollback, savepoint=savepoint) 5696 5697 return self.expression(exp.Commit, chain=chain) 5698 5699 def _parse_refresh(self) -> exp.Refresh: 5700 self._match(TokenType.TABLE) 5701 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5702 5703 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5704 if not self._match_text_seq("ADD"): 5705 return None 5706 5707 self._match(TokenType.COLUMN) 5708 exists_column = self._parse_exists(not_=True) 5709 expression = self._parse_field_def() 5710 5711 if expression: 5712 expression.set("exists", exists_column) 5713 5714 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5715 if self._match_texts(("FIRST", "AFTER")): 5716 position = self._prev.text 5717 column_position = self.expression( 5718 exp.ColumnPosition, this=self._parse_column(), position=position 5719 ) 5720 expression.set("position", column_position) 5721 5722 return expression 5723 5724 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5725 drop = self._match(TokenType.DROP) and self._parse_drop() 5726 if drop and not isinstance(drop, exp.Command): 5727 drop.set("kind", drop.args.get("kind", "COLUMN")) 5728 return drop 5729 5730 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5731 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5732 return self.expression( 5733 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5734 ) 5735 5736 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5737 index = self._index - 1 5738 5739 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5740 return self._parse_csv( 5741 lambda: self.expression( 5742 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5743 ) 5744 ) 5745 5746 self._retreat(index) 5747 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5748 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5749 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5750 5751 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5752 self._match(TokenType.COLUMN) 5753 column = self._parse_field(any_token=True) 5754 5755 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5756 return self.expression(exp.AlterColumn, this=column, drop=True) 5757 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5758 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5759 if self._match(TokenType.COMMENT): 5760 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5761 5762 self._match_text_seq("SET", "DATA") 5763 self._match_text_seq("TYPE") 5764 return self.expression( 5765 exp.AlterColumn, 5766 this=column, 5767 dtype=self._parse_types(), 5768 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5769 using=self._match(TokenType.USING) and self._parse_conjunction(), 5770 ) 5771 5772 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5773 index = self._index - 1 5774 5775 partition_exists = self._parse_exists() 5776 if self._match(TokenType.PARTITION, advance=False): 5777 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5778 5779 self._retreat(index) 5780 return self._parse_csv(self._parse_drop_column) 5781 5782 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5783 if self._match(TokenType.COLUMN): 5784 exists = self._parse_exists() 5785 old_column = self._parse_column() 5786 to = self._match_text_seq("TO") 5787 new_column = self._parse_column() 5788 5789 if old_column is None or to is None or new_column is None: 5790 return None 5791 5792 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5793 5794 self._match_text_seq("TO") 5795 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5796 5797 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5798 start = self._prev 5799 5800 if not self._match(TokenType.TABLE): 5801 return self._parse_as_command(start) 5802 5803 exists = self._parse_exists() 5804 only = self._match_text_seq("ONLY") 5805 this = self._parse_table(schema=True) 5806 5807 if self._next: 5808 self._advance() 5809 5810 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5811 if parser: 5812 actions = ensure_list(parser(self)) 5813 options = self._parse_csv(self._parse_property) 5814 5815 if not self._curr and actions: 5816 return self.expression( 5817 exp.AlterTable, 5818 this=this, 5819 exists=exists, 5820 actions=actions, 5821 only=only, 5822 options=options, 5823 ) 5824 5825 return self._parse_as_command(start) 5826 5827 def _parse_merge(self) -> exp.Merge: 5828 self._match(TokenType.INTO) 5829 target = self._parse_table() 5830 5831 if target and self._match(TokenType.ALIAS, advance=False): 5832 target.set("alias", self._parse_table_alias()) 5833 5834 self._match(TokenType.USING) 5835 using = self._parse_table() 5836 5837 self._match(TokenType.ON) 5838 on = self._parse_conjunction() 5839 5840 return self.expression( 5841 exp.Merge, 5842 this=target, 5843 using=using, 5844 on=on, 5845 expressions=self._parse_when_matched(), 5846 ) 5847 5848 def _parse_when_matched(self) -> t.List[exp.When]: 5849 whens = [] 5850 5851 while self._match(TokenType.WHEN): 5852 matched = not self._match(TokenType.NOT) 5853 self._match_text_seq("MATCHED") 5854 source = ( 5855 False 5856 if self._match_text_seq("BY", "TARGET") 5857 else self._match_text_seq("BY", "SOURCE") 5858 ) 5859 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5860 5861 self._match(TokenType.THEN) 5862 5863 if self._match(TokenType.INSERT): 5864 _this = self._parse_star() 5865 if _this: 5866 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5867 else: 5868 then = self.expression( 5869 exp.Insert, 5870 this=self._parse_value(), 5871 expression=self._match_text_seq("VALUES") and self._parse_value(), 5872 ) 5873 elif self._match(TokenType.UPDATE): 5874 expressions = self._parse_star() 5875 if expressions: 5876 then = self.expression(exp.Update, expressions=expressions) 5877 else: 5878 then = self.expression( 5879 exp.Update, 5880 expressions=self._match(TokenType.SET) 5881 and self._parse_csv(self._parse_equality), 5882 ) 5883 elif self._match(TokenType.DELETE): 5884 then = self.expression(exp.Var, this=self._prev.text) 5885 else: 5886 then = None 5887 5888 whens.append( 5889 self.expression( 5890 exp.When, 5891 matched=matched, 5892 source=source, 5893 condition=condition, 5894 then=then, 5895 ) 5896 ) 5897 return whens 5898 5899 def _parse_show(self) -> t.Optional[exp.Expression]: 5900 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5901 if parser: 5902 return parser(self) 5903 return self._parse_as_command(self._prev) 5904 5905 def _parse_set_item_assignment( 5906 self, kind: t.Optional[str] = None 5907 ) -> t.Optional[exp.Expression]: 5908 index = self._index 5909 5910 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5911 return self._parse_set_transaction(global_=kind == "GLOBAL") 5912 5913 left = self._parse_primary() or self._parse_id_var() 5914 assignment_delimiter = self._match_texts(("=", "TO")) 5915 5916 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5917 self._retreat(index) 5918 return None 5919 5920 right = self._parse_statement() or self._parse_id_var() 5921 this = self.expression(exp.EQ, this=left, expression=right) 5922 5923 return self.expression(exp.SetItem, this=this, kind=kind) 5924 5925 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5926 self._match_text_seq("TRANSACTION") 5927 characteristics = self._parse_csv( 5928 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5929 ) 5930 return self.expression( 5931 exp.SetItem, 5932 expressions=characteristics, 5933 kind="TRANSACTION", 5934 **{"global": global_}, # type: ignore 5935 ) 5936 5937 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5938 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5939 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5940 5941 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5942 index = self._index 5943 set_ = self.expression( 5944 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5945 ) 5946 5947 if self._curr: 5948 self._retreat(index) 5949 return self._parse_as_command(self._prev) 5950 5951 return set_ 5952 5953 def _parse_var_from_options( 5954 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5955 ) -> t.Optional[exp.Var]: 5956 start = self._curr 5957 if not start: 5958 return None 5959 5960 option = start.text.upper() 5961 continuations = options.get(option) 5962 5963 index = self._index 5964 self._advance() 5965 for keywords in continuations or []: 5966 if isinstance(keywords, str): 5967 keywords = (keywords,) 5968 5969 if self._match_text_seq(*keywords): 5970 option = f"{option} {' '.join(keywords)}" 5971 break 5972 else: 5973 if continuations or continuations is None: 5974 if raise_unmatched: 5975 self.raise_error(f"Unknown option {option}") 5976 5977 self._retreat(index) 5978 return None 5979 5980 return exp.var(option) 5981 5982 def _parse_as_command(self, start: Token) -> exp.Command: 5983 while self._curr: 5984 self._advance() 5985 text = self._find_sql(start, self._prev) 5986 size = len(start.text) 5987 self._warn_unsupported() 5988 return exp.Command(this=text[:size], expression=text[size:]) 5989 5990 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5991 settings = [] 5992 5993 self._match_l_paren() 5994 kind = self._parse_id_var() 5995 5996 if self._match(TokenType.L_PAREN): 5997 while True: 5998 key = self._parse_id_var() 5999 value = self._parse_primary() 6000 6001 if not key and value is None: 6002 break 6003 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6004 self._match(TokenType.R_PAREN) 6005 6006 self._match_r_paren() 6007 6008 return self.expression( 6009 exp.DictProperty, 6010 this=this, 6011 kind=kind.this if kind else None, 6012 settings=settings, 6013 ) 6014 6015 def _parse_dict_range(self, this: str) -> exp.DictRange: 6016 self._match_l_paren() 6017 has_min = self._match_text_seq("MIN") 6018 if has_min: 6019 min = self._parse_var() or self._parse_primary() 6020 self._match_text_seq("MAX") 6021 max = self._parse_var() or self._parse_primary() 6022 else: 6023 max = self._parse_var() or self._parse_primary() 6024 min = exp.Literal.number(0) 6025 self._match_r_paren() 6026 return self.expression(exp.DictRange, this=this, min=min, max=max) 6027 6028 def _parse_comprehension( 6029 self, this: t.Optional[exp.Expression] 6030 ) -> t.Optional[exp.Comprehension]: 6031 index = self._index 6032 expression = self._parse_column() 6033 if not self._match(TokenType.IN): 6034 self._retreat(index - 1) 6035 return None 6036 iterator = self._parse_column() 6037 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6038 return self.expression( 6039 exp.Comprehension, 6040 this=this, 6041 expression=expression, 6042 iterator=iterator, 6043 condition=condition, 6044 ) 6045 6046 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6047 if self._match(TokenType.HEREDOC_STRING): 6048 return self.expression(exp.Heredoc, this=self._prev.text) 6049 6050 if not self._match_text_seq("$"): 6051 return None 6052 6053 tags = ["$"] 6054 tag_text = None 6055 6056 if self._is_connected(): 6057 self._advance() 6058 tags.append(self._prev.text.upper()) 6059 else: 6060 self.raise_error("No closing $ found") 6061 6062 if tags[-1] != "$": 6063 if self._is_connected() and self._match_text_seq("$"): 6064 tag_text = tags[-1] 6065 tags.append("$") 6066 else: 6067 self.raise_error("No closing $ found") 6068 6069 heredoc_start = self._curr 6070 6071 while self._curr: 6072 if self._match_text_seq(*tags, advance=False): 6073 this = self._find_sql(heredoc_start, self._prev) 6074 self._advance(len(tags)) 6075 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6076 6077 self._advance() 6078 6079 self.raise_error(f"No closing {''.join(tags)} found") 6080 return None 6081 6082 def _find_parser( 6083 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6084 ) -> t.Optional[t.Callable]: 6085 if not self._curr: 6086 return None 6087 6088 index = self._index 6089 this = [] 6090 while True: 6091 # The current token might be multiple words 6092 curr = self._curr.text.upper() 6093 key = curr.split(" ") 6094 this.append(curr) 6095 6096 self._advance() 6097 result, trie = in_trie(trie, key) 6098 if result == TrieResult.FAILED: 6099 break 6100 6101 if result == TrieResult.EXISTS: 6102 subparser = parsers[" ".join(this)] 6103 return subparser 6104 6105 self._retreat(index) 6106 return None 6107 6108 def _match(self, token_type, advance=True, expression=None): 6109 if not self._curr: 6110 return None 6111 6112 if self._curr.token_type == token_type: 6113 if advance: 6114 self._advance() 6115 self._add_comments(expression) 6116 return True 6117 6118 return None 6119 6120 def _match_set(self, types, advance=True): 6121 if not self._curr: 6122 return None 6123 6124 if self._curr.token_type in types: 6125 if advance: 6126 self._advance() 6127 return True 6128 6129 return None 6130 6131 def _match_pair(self, token_type_a, token_type_b, advance=True): 6132 if not self._curr or not self._next: 6133 return None 6134 6135 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6136 if advance: 6137 self._advance(2) 6138 return True 6139 6140 return None 6141 6142 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6143 if not self._match(TokenType.L_PAREN, expression=expression): 6144 self.raise_error("Expecting (") 6145 6146 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6147 if not self._match(TokenType.R_PAREN, expression=expression): 6148 self.raise_error("Expecting )") 6149 6150 def _match_texts(self, texts, advance=True): 6151 if self._curr and self._curr.text.upper() in texts: 6152 if advance: 6153 self._advance() 6154 return True 6155 return None 6156 6157 def _match_text_seq(self, *texts, advance=True): 6158 index = self._index 6159 for text in texts: 6160 if self._curr and self._curr.text.upper() == text: 6161 self._advance() 6162 else: 6163 self._retreat(index) 6164 return None 6165 6166 if not advance: 6167 self._retreat(index) 6168 6169 return True 6170 6171 def _replace_lambda( 6172 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6173 ) -> t.Optional[exp.Expression]: 6174 if not node: 6175 return node 6176 6177 for column in node.find_all(exp.Column): 6178 if column.parts[0].name in lambda_variables: 6179 dot_or_id = column.to_dot() if column.table else column.this 6180 parent = column.parent 6181 6182 while isinstance(parent, exp.Dot): 6183 if not isinstance(parent.parent, exp.Dot): 6184 parent.replace(dot_or_id) 6185 break 6186 parent = parent.parent 6187 else: 6188 if column is node: 6189 node = dot_or_id 6190 else: 6191 column.replace(dot_or_id) 6192 return node 6193 6194 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6195 start = self._prev 6196 6197 # Not to be confused with TRUNCATE(number, decimals) function call 6198 if self._match(TokenType.L_PAREN): 6199 self._retreat(self._index - 2) 6200 return self._parse_function() 6201 6202 # Clickhouse supports TRUNCATE DATABASE as well 6203 is_database = self._match(TokenType.DATABASE) 6204 6205 self._match(TokenType.TABLE) 6206 6207 exists = self._parse_exists(not_=False) 6208 6209 expressions = self._parse_csv( 6210 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6211 ) 6212 6213 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6214 6215 if self._match_text_seq("RESTART", "IDENTITY"): 6216 identity = "RESTART" 6217 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6218 identity = "CONTINUE" 6219 else: 6220 identity = None 6221 6222 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6223 option = self._prev.text 6224 else: 6225 option = None 6226 6227 partition = self._parse_partition() 6228 6229 # Fallback case 6230 if self._curr: 6231 return self._parse_as_command(start) 6232 6233 return self.expression( 6234 exp.TruncateTable, 6235 expressions=expressions, 6236 is_database=is_database, 6237 exists=exists, 6238 cluster=cluster, 6239 identity=identity, 6240 option=option, 6241 partition=partition, 6242 ) 6243 6244 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6245 this = self._parse_ordered(self._parse_opclass) 6246 6247 if not self._match(TokenType.WITH): 6248 return this 6249 6250 op = self._parse_var(any_token=True) 6251 6252 return self.expression(exp.WithOperator, this=this, op=op)
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
65def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 66 def _builder(args: t.List, dialect: Dialect) -> E: 67 expression = expr_type( 68 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 69 ) 70 if len(args) > 2 and expr_type is exp.JSONExtract: 71 expression.set("expressions", args[2:]) 72 73 return expression 74 75 return _builder
88class Parser(metaclass=_Parser): 89 """ 90 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 91 92 Args: 93 error_level: The desired error level. 94 Default: ErrorLevel.IMMEDIATE 95 error_message_context: The amount of context to capture from a query string when displaying 96 the error message (in number of characters). 97 Default: 100 98 max_errors: Maximum number of error messages to include in a raised ParseError. 99 This is only relevant if error_level is ErrorLevel.RAISE. 100 Default: 3 101 """ 102 103 FUNCTIONS: t.Dict[str, t.Callable] = { 104 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 105 "CONCAT": lambda args, dialect: exp.Concat( 106 expressions=args, 107 safe=not dialect.STRICT_STRING_CONCAT, 108 coalesce=dialect.CONCAT_COALESCE, 109 ), 110 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 111 expressions=args, 112 safe=not dialect.STRICT_STRING_CONCAT, 113 coalesce=dialect.CONCAT_COALESCE, 114 ), 115 "DATE_TO_DATE_STR": lambda args: exp.Cast( 116 this=seq_get(args, 0), 117 to=exp.DataType(this=exp.DataType.Type.TEXT), 118 ), 119 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 120 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 121 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 122 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 123 "LIKE": build_like, 124 "LOG": build_logarithm, 125 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 126 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 127 "MOD": lambda args: exp.Mod(this=seq_get(args, 0), expression=seq_get(args, 1)), 128 "TIME_TO_TIME_STR": lambda args: exp.Cast( 129 this=seq_get(args, 0), 130 to=exp.DataType(this=exp.DataType.Type.TEXT), 131 ), 132 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 133 this=exp.Cast( 134 this=seq_get(args, 0), 135 to=exp.DataType(this=exp.DataType.Type.TEXT), 136 ), 137 start=exp.Literal.number(1), 138 length=exp.Literal.number(10), 139 ), 140 "VAR_MAP": build_var_map, 141 } 142 143 NO_PAREN_FUNCTIONS = { 144 TokenType.CURRENT_DATE: exp.CurrentDate, 145 TokenType.CURRENT_DATETIME: exp.CurrentDate, 146 TokenType.CURRENT_TIME: exp.CurrentTime, 147 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 148 TokenType.CURRENT_USER: exp.CurrentUser, 149 } 150 151 STRUCT_TYPE_TOKENS = { 152 TokenType.NESTED, 153 TokenType.OBJECT, 154 TokenType.STRUCT, 155 } 156 157 NESTED_TYPE_TOKENS = { 158 TokenType.ARRAY, 159 TokenType.LOWCARDINALITY, 160 TokenType.MAP, 161 TokenType.NULLABLE, 162 *STRUCT_TYPE_TOKENS, 163 } 164 165 ENUM_TYPE_TOKENS = { 166 TokenType.ENUM, 167 TokenType.ENUM8, 168 TokenType.ENUM16, 169 } 170 171 AGGREGATE_TYPE_TOKENS = { 172 TokenType.AGGREGATEFUNCTION, 173 TokenType.SIMPLEAGGREGATEFUNCTION, 174 } 175 176 TYPE_TOKENS = { 177 TokenType.BIT, 178 TokenType.BOOLEAN, 179 TokenType.TINYINT, 180 TokenType.UTINYINT, 181 TokenType.SMALLINT, 182 TokenType.USMALLINT, 183 TokenType.INT, 184 TokenType.UINT, 185 TokenType.BIGINT, 186 TokenType.UBIGINT, 187 TokenType.INT128, 188 TokenType.UINT128, 189 TokenType.INT256, 190 TokenType.UINT256, 191 TokenType.MEDIUMINT, 192 TokenType.UMEDIUMINT, 193 TokenType.FIXEDSTRING, 194 TokenType.FLOAT, 195 TokenType.DOUBLE, 196 TokenType.CHAR, 197 TokenType.NCHAR, 198 TokenType.VARCHAR, 199 TokenType.NVARCHAR, 200 TokenType.BPCHAR, 201 TokenType.TEXT, 202 TokenType.MEDIUMTEXT, 203 TokenType.LONGTEXT, 204 TokenType.MEDIUMBLOB, 205 TokenType.LONGBLOB, 206 TokenType.BINARY, 207 TokenType.VARBINARY, 208 TokenType.JSON, 209 TokenType.JSONB, 210 TokenType.INTERVAL, 211 TokenType.TINYBLOB, 212 TokenType.TINYTEXT, 213 TokenType.TIME, 214 TokenType.TIMETZ, 215 TokenType.TIMESTAMP, 216 TokenType.TIMESTAMP_S, 217 TokenType.TIMESTAMP_MS, 218 TokenType.TIMESTAMP_NS, 219 TokenType.TIMESTAMPTZ, 220 TokenType.TIMESTAMPLTZ, 221 TokenType.DATETIME, 222 TokenType.DATETIME64, 223 TokenType.DATE, 224 TokenType.DATE32, 225 TokenType.INT4RANGE, 226 TokenType.INT4MULTIRANGE, 227 TokenType.INT8RANGE, 228 TokenType.INT8MULTIRANGE, 229 TokenType.NUMRANGE, 230 TokenType.NUMMULTIRANGE, 231 TokenType.TSRANGE, 232 TokenType.TSMULTIRANGE, 233 TokenType.TSTZRANGE, 234 TokenType.TSTZMULTIRANGE, 235 TokenType.DATERANGE, 236 TokenType.DATEMULTIRANGE, 237 TokenType.DECIMAL, 238 TokenType.UDECIMAL, 239 TokenType.BIGDECIMAL, 240 TokenType.UUID, 241 TokenType.GEOGRAPHY, 242 TokenType.GEOMETRY, 243 TokenType.HLLSKETCH, 244 TokenType.HSTORE, 245 TokenType.PSEUDO_TYPE, 246 TokenType.SUPER, 247 TokenType.SERIAL, 248 TokenType.SMALLSERIAL, 249 TokenType.BIGSERIAL, 250 TokenType.XML, 251 TokenType.YEAR, 252 TokenType.UNIQUEIDENTIFIER, 253 TokenType.USERDEFINED, 254 TokenType.MONEY, 255 TokenType.SMALLMONEY, 256 TokenType.ROWVERSION, 257 TokenType.IMAGE, 258 TokenType.VARIANT, 259 TokenType.OBJECT, 260 TokenType.OBJECT_IDENTIFIER, 261 TokenType.INET, 262 TokenType.IPADDRESS, 263 TokenType.IPPREFIX, 264 TokenType.IPV4, 265 TokenType.IPV6, 266 TokenType.UNKNOWN, 267 TokenType.NULL, 268 TokenType.NAME, 269 *ENUM_TYPE_TOKENS, 270 *NESTED_TYPE_TOKENS, 271 *AGGREGATE_TYPE_TOKENS, 272 } 273 274 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 275 TokenType.BIGINT: TokenType.UBIGINT, 276 TokenType.INT: TokenType.UINT, 277 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 278 TokenType.SMALLINT: TokenType.USMALLINT, 279 TokenType.TINYINT: TokenType.UTINYINT, 280 TokenType.DECIMAL: TokenType.UDECIMAL, 281 } 282 283 SUBQUERY_PREDICATES = { 284 TokenType.ANY: exp.Any, 285 TokenType.ALL: exp.All, 286 TokenType.EXISTS: exp.Exists, 287 TokenType.SOME: exp.Any, 288 } 289 290 RESERVED_TOKENS = { 291 *Tokenizer.SINGLE_TOKENS.values(), 292 TokenType.SELECT, 293 } 294 295 DB_CREATABLES = { 296 TokenType.DATABASE, 297 TokenType.SCHEMA, 298 TokenType.TABLE, 299 TokenType.VIEW, 300 TokenType.MODEL, 301 TokenType.DICTIONARY, 302 TokenType.SEQUENCE, 303 TokenType.STORAGE_INTEGRATION, 304 } 305 306 CREATABLES = { 307 TokenType.COLUMN, 308 TokenType.CONSTRAINT, 309 TokenType.FUNCTION, 310 TokenType.INDEX, 311 TokenType.PROCEDURE, 312 TokenType.FOREIGN_KEY, 313 *DB_CREATABLES, 314 } 315 316 # Tokens that can represent identifiers 317 ID_VAR_TOKENS = { 318 TokenType.VAR, 319 TokenType.ANTI, 320 TokenType.APPLY, 321 TokenType.ASC, 322 TokenType.ASOF, 323 TokenType.AUTO_INCREMENT, 324 TokenType.BEGIN, 325 TokenType.BPCHAR, 326 TokenType.CACHE, 327 TokenType.CASE, 328 TokenType.COLLATE, 329 TokenType.COMMAND, 330 TokenType.COMMENT, 331 TokenType.COMMIT, 332 TokenType.CONSTRAINT, 333 TokenType.DEFAULT, 334 TokenType.DELETE, 335 TokenType.DESC, 336 TokenType.DESCRIBE, 337 TokenType.DICTIONARY, 338 TokenType.DIV, 339 TokenType.END, 340 TokenType.EXECUTE, 341 TokenType.ESCAPE, 342 TokenType.FALSE, 343 TokenType.FIRST, 344 TokenType.FILTER, 345 TokenType.FINAL, 346 TokenType.FORMAT, 347 TokenType.FULL, 348 TokenType.IDENTIFIER, 349 TokenType.IS, 350 TokenType.ISNULL, 351 TokenType.INTERVAL, 352 TokenType.KEEP, 353 TokenType.KILL, 354 TokenType.LEFT, 355 TokenType.LOAD, 356 TokenType.MERGE, 357 TokenType.NATURAL, 358 TokenType.NEXT, 359 TokenType.OFFSET, 360 TokenType.OPERATOR, 361 TokenType.ORDINALITY, 362 TokenType.OVERLAPS, 363 TokenType.OVERWRITE, 364 TokenType.PARTITION, 365 TokenType.PERCENT, 366 TokenType.PIVOT, 367 TokenType.PRAGMA, 368 TokenType.RANGE, 369 TokenType.RECURSIVE, 370 TokenType.REFERENCES, 371 TokenType.REFRESH, 372 TokenType.REPLACE, 373 TokenType.RIGHT, 374 TokenType.ROW, 375 TokenType.ROWS, 376 TokenType.SEMI, 377 TokenType.SET, 378 TokenType.SETTINGS, 379 TokenType.SHOW, 380 TokenType.TEMPORARY, 381 TokenType.TOP, 382 TokenType.TRUE, 383 TokenType.TRUNCATE, 384 TokenType.UNIQUE, 385 TokenType.UNPIVOT, 386 TokenType.UPDATE, 387 TokenType.USE, 388 TokenType.VOLATILE, 389 TokenType.WINDOW, 390 *CREATABLES, 391 *SUBQUERY_PREDICATES, 392 *TYPE_TOKENS, 393 *NO_PAREN_FUNCTIONS, 394 } 395 396 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 397 398 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 399 TokenType.ANTI, 400 TokenType.APPLY, 401 TokenType.ASOF, 402 TokenType.FULL, 403 TokenType.LEFT, 404 TokenType.LOCK, 405 TokenType.NATURAL, 406 TokenType.OFFSET, 407 TokenType.RIGHT, 408 TokenType.SEMI, 409 TokenType.WINDOW, 410 } 411 412 ALIAS_TOKENS = ID_VAR_TOKENS 413 414 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 415 416 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 417 418 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 419 420 FUNC_TOKENS = { 421 TokenType.COLLATE, 422 TokenType.COMMAND, 423 TokenType.CURRENT_DATE, 424 TokenType.CURRENT_DATETIME, 425 TokenType.CURRENT_TIMESTAMP, 426 TokenType.CURRENT_TIME, 427 TokenType.CURRENT_USER, 428 TokenType.FILTER, 429 TokenType.FIRST, 430 TokenType.FORMAT, 431 TokenType.GLOB, 432 TokenType.IDENTIFIER, 433 TokenType.INDEX, 434 TokenType.ISNULL, 435 TokenType.ILIKE, 436 TokenType.INSERT, 437 TokenType.LIKE, 438 TokenType.MERGE, 439 TokenType.OFFSET, 440 TokenType.PRIMARY_KEY, 441 TokenType.RANGE, 442 TokenType.REPLACE, 443 TokenType.RLIKE, 444 TokenType.ROW, 445 TokenType.UNNEST, 446 TokenType.VAR, 447 TokenType.LEFT, 448 TokenType.RIGHT, 449 TokenType.SEQUENCE, 450 TokenType.DATE, 451 TokenType.DATETIME, 452 TokenType.TABLE, 453 TokenType.TIMESTAMP, 454 TokenType.TIMESTAMPTZ, 455 TokenType.TRUNCATE, 456 TokenType.WINDOW, 457 TokenType.XOR, 458 *TYPE_TOKENS, 459 *SUBQUERY_PREDICATES, 460 } 461 462 CONJUNCTION = { 463 TokenType.AND: exp.And, 464 TokenType.OR: exp.Or, 465 } 466 467 EQUALITY = { 468 TokenType.COLON_EQ: exp.PropertyEQ, 469 TokenType.EQ: exp.EQ, 470 TokenType.NEQ: exp.NEQ, 471 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 472 } 473 474 COMPARISON = { 475 TokenType.GT: exp.GT, 476 TokenType.GTE: exp.GTE, 477 TokenType.LT: exp.LT, 478 TokenType.LTE: exp.LTE, 479 } 480 481 BITWISE = { 482 TokenType.AMP: exp.BitwiseAnd, 483 TokenType.CARET: exp.BitwiseXor, 484 TokenType.PIPE: exp.BitwiseOr, 485 } 486 487 TERM = { 488 TokenType.DASH: exp.Sub, 489 TokenType.PLUS: exp.Add, 490 TokenType.MOD: exp.Mod, 491 TokenType.COLLATE: exp.Collate, 492 } 493 494 FACTOR = { 495 TokenType.DIV: exp.IntDiv, 496 TokenType.LR_ARROW: exp.Distance, 497 TokenType.SLASH: exp.Div, 498 TokenType.STAR: exp.Mul, 499 } 500 501 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 502 503 TIMES = { 504 TokenType.TIME, 505 TokenType.TIMETZ, 506 } 507 508 TIMESTAMPS = { 509 TokenType.TIMESTAMP, 510 TokenType.TIMESTAMPTZ, 511 TokenType.TIMESTAMPLTZ, 512 *TIMES, 513 } 514 515 SET_OPERATIONS = { 516 TokenType.UNION, 517 TokenType.INTERSECT, 518 TokenType.EXCEPT, 519 } 520 521 JOIN_METHODS = { 522 TokenType.ASOF, 523 TokenType.NATURAL, 524 TokenType.POSITIONAL, 525 } 526 527 JOIN_SIDES = { 528 TokenType.LEFT, 529 TokenType.RIGHT, 530 TokenType.FULL, 531 } 532 533 JOIN_KINDS = { 534 TokenType.INNER, 535 TokenType.OUTER, 536 TokenType.CROSS, 537 TokenType.SEMI, 538 TokenType.ANTI, 539 } 540 541 JOIN_HINTS: t.Set[str] = set() 542 543 LAMBDAS = { 544 TokenType.ARROW: lambda self, expressions: self.expression( 545 exp.Lambda, 546 this=self._replace_lambda( 547 self._parse_conjunction(), 548 {node.name for node in expressions}, 549 ), 550 expressions=expressions, 551 ), 552 TokenType.FARROW: lambda self, expressions: self.expression( 553 exp.Kwarg, 554 this=exp.var(expressions[0].name), 555 expression=self._parse_conjunction(), 556 ), 557 } 558 559 COLUMN_OPERATORS = { 560 TokenType.DOT: None, 561 TokenType.DCOLON: lambda self, this, to: self.expression( 562 exp.Cast if self.STRICT_CAST else exp.TryCast, 563 this=this, 564 to=to, 565 ), 566 TokenType.ARROW: lambda self, this, path: self.expression( 567 exp.JSONExtract, 568 this=this, 569 expression=self.dialect.to_json_path(path), 570 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 571 ), 572 TokenType.DARROW: lambda self, this, path: self.expression( 573 exp.JSONExtractScalar, 574 this=this, 575 expression=self.dialect.to_json_path(path), 576 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 577 ), 578 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 579 exp.JSONBExtract, 580 this=this, 581 expression=path, 582 ), 583 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 584 exp.JSONBExtractScalar, 585 this=this, 586 expression=path, 587 ), 588 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 589 exp.JSONBContains, 590 this=this, 591 expression=key, 592 ), 593 } 594 595 EXPRESSION_PARSERS = { 596 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 597 exp.Column: lambda self: self._parse_column(), 598 exp.Condition: lambda self: self._parse_conjunction(), 599 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 600 exp.Expression: lambda self: self._parse_expression(), 601 exp.From: lambda self: self._parse_from(), 602 exp.Group: lambda self: self._parse_group(), 603 exp.Having: lambda self: self._parse_having(), 604 exp.Identifier: lambda self: self._parse_id_var(), 605 exp.Join: lambda self: self._parse_join(), 606 exp.Lambda: lambda self: self._parse_lambda(), 607 exp.Lateral: lambda self: self._parse_lateral(), 608 exp.Limit: lambda self: self._parse_limit(), 609 exp.Offset: lambda self: self._parse_offset(), 610 exp.Order: lambda self: self._parse_order(), 611 exp.Ordered: lambda self: self._parse_ordered(), 612 exp.Properties: lambda self: self._parse_properties(), 613 exp.Qualify: lambda self: self._parse_qualify(), 614 exp.Returning: lambda self: self._parse_returning(), 615 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 616 exp.Table: lambda self: self._parse_table_parts(), 617 exp.TableAlias: lambda self: self._parse_table_alias(), 618 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 619 exp.Where: lambda self: self._parse_where(), 620 exp.Window: lambda self: self._parse_named_window(), 621 exp.With: lambda self: self._parse_with(), 622 "JOIN_TYPE": lambda self: self._parse_join_parts(), 623 } 624 625 STATEMENT_PARSERS = { 626 TokenType.ALTER: lambda self: self._parse_alter(), 627 TokenType.BEGIN: lambda self: self._parse_transaction(), 628 TokenType.CACHE: lambda self: self._parse_cache(), 629 TokenType.COMMENT: lambda self: self._parse_comment(), 630 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 631 TokenType.CREATE: lambda self: self._parse_create(), 632 TokenType.DELETE: lambda self: self._parse_delete(), 633 TokenType.DESC: lambda self: self._parse_describe(), 634 TokenType.DESCRIBE: lambda self: self._parse_describe(), 635 TokenType.DROP: lambda self: self._parse_drop(), 636 TokenType.INSERT: lambda self: self._parse_insert(), 637 TokenType.KILL: lambda self: self._parse_kill(), 638 TokenType.LOAD: lambda self: self._parse_load(), 639 TokenType.MERGE: lambda self: self._parse_merge(), 640 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 641 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 642 TokenType.REFRESH: lambda self: self._parse_refresh(), 643 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 644 TokenType.SET: lambda self: self._parse_set(), 645 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 646 TokenType.UNCACHE: lambda self: self._parse_uncache(), 647 TokenType.UPDATE: lambda self: self._parse_update(), 648 TokenType.USE: lambda self: self.expression( 649 exp.Use, 650 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 651 this=self._parse_table(schema=False), 652 ), 653 } 654 655 UNARY_PARSERS = { 656 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 657 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 658 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 659 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 660 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 661 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 662 } 663 664 STRING_PARSERS = { 665 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 666 exp.RawString, this=token.text 667 ), 668 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 669 exp.National, this=token.text 670 ), 671 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 672 TokenType.STRING: lambda self, token: self.expression( 673 exp.Literal, this=token.text, is_string=True 674 ), 675 TokenType.UNICODE_STRING: lambda self, token: self.expression( 676 exp.UnicodeString, 677 this=token.text, 678 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 679 ), 680 } 681 682 NUMERIC_PARSERS = { 683 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 684 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 685 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 686 TokenType.NUMBER: lambda self, token: self.expression( 687 exp.Literal, this=token.text, is_string=False 688 ), 689 } 690 691 PRIMARY_PARSERS = { 692 **STRING_PARSERS, 693 **NUMERIC_PARSERS, 694 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 695 TokenType.NULL: lambda self, _: self.expression(exp.Null), 696 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 697 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 698 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 699 TokenType.STAR: lambda self, _: self.expression( 700 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 701 ), 702 } 703 704 PLACEHOLDER_PARSERS = { 705 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 706 TokenType.PARAMETER: lambda self: self._parse_parameter(), 707 TokenType.COLON: lambda self: ( 708 self.expression(exp.Placeholder, this=self._prev.text) 709 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 710 else None 711 ), 712 } 713 714 RANGE_PARSERS = { 715 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 716 TokenType.GLOB: binary_range_parser(exp.Glob), 717 TokenType.ILIKE: binary_range_parser(exp.ILike), 718 TokenType.IN: lambda self, this: self._parse_in(this), 719 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 720 TokenType.IS: lambda self, this: self._parse_is(this), 721 TokenType.LIKE: binary_range_parser(exp.Like), 722 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 723 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 724 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 725 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 726 } 727 728 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 729 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 730 "AUTO": lambda self: self._parse_auto_property(), 731 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 732 "BACKUP": lambda self: self.expression( 733 exp.BackupProperty, this=self._parse_var(any_token=True) 734 ), 735 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 736 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 737 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 738 "CHECKSUM": lambda self: self._parse_checksum(), 739 "CLUSTER BY": lambda self: self._parse_cluster(), 740 "CLUSTERED": lambda self: self._parse_clustered_by(), 741 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 742 exp.CollateProperty, **kwargs 743 ), 744 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 745 "CONTAINS": lambda self: self._parse_contains_property(), 746 "COPY": lambda self: self._parse_copy_property(), 747 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 748 "DEFINER": lambda self: self._parse_definer(), 749 "DETERMINISTIC": lambda self: self.expression( 750 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 751 ), 752 "DISTKEY": lambda self: self._parse_distkey(), 753 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 754 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 755 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 756 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 757 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 758 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 759 "FREESPACE": lambda self: self._parse_freespace(), 760 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 761 "HEAP": lambda self: self.expression(exp.HeapProperty), 762 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 763 "IMMUTABLE": lambda self: self.expression( 764 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 765 ), 766 "INHERITS": lambda self: self.expression( 767 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 768 ), 769 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 770 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 771 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 772 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 773 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 774 "LIKE": lambda self: self._parse_create_like(), 775 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 776 "LOCK": lambda self: self._parse_locking(), 777 "LOCKING": lambda self: self._parse_locking(), 778 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 779 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 780 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 781 "MODIFIES": lambda self: self._parse_modifies_property(), 782 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 783 "NO": lambda self: self._parse_no_property(), 784 "ON": lambda self: self._parse_on_property(), 785 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 786 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 787 "PARTITION": lambda self: self._parse_partitioned_of(), 788 "PARTITION BY": lambda self: self._parse_partitioned_by(), 789 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 790 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 791 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 792 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 793 "READS": lambda self: self._parse_reads_property(), 794 "REMOTE": lambda self: self._parse_remote_with_connection(), 795 "RETURNS": lambda self: self._parse_returns(), 796 "ROW": lambda self: self._parse_row(), 797 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 798 "SAMPLE": lambda self: self.expression( 799 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 800 ), 801 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 802 "SETTINGS": lambda self: self.expression( 803 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 804 ), 805 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 806 "SORTKEY": lambda self: self._parse_sortkey(), 807 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 808 "STABLE": lambda self: self.expression( 809 exp.StabilityProperty, this=exp.Literal.string("STABLE") 810 ), 811 "STORED": lambda self: self._parse_stored(), 812 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 813 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 814 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 815 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 816 "TO": lambda self: self._parse_to_table(), 817 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 818 "TRANSFORM": lambda self: self.expression( 819 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 820 ), 821 "TTL": lambda self: self._parse_ttl(), 822 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 823 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 824 "VOLATILE": lambda self: self._parse_volatile_property(), 825 "WITH": lambda self: self._parse_with_property(), 826 } 827 828 CONSTRAINT_PARSERS = { 829 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 830 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 831 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 832 "CHARACTER SET": lambda self: self.expression( 833 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 834 ), 835 "CHECK": lambda self: self.expression( 836 exp.CheckColumnConstraint, 837 this=self._parse_wrapped(self._parse_conjunction), 838 enforced=self._match_text_seq("ENFORCED"), 839 ), 840 "COLLATE": lambda self: self.expression( 841 exp.CollateColumnConstraint, this=self._parse_var() 842 ), 843 "COMMENT": lambda self: self.expression( 844 exp.CommentColumnConstraint, this=self._parse_string() 845 ), 846 "COMPRESS": lambda self: self._parse_compress(), 847 "CLUSTERED": lambda self: self.expression( 848 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 849 ), 850 "NONCLUSTERED": lambda self: self.expression( 851 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 852 ), 853 "DEFAULT": lambda self: self.expression( 854 exp.DefaultColumnConstraint, this=self._parse_bitwise() 855 ), 856 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 857 "EPHEMERAL": lambda self: self.expression( 858 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 859 ), 860 "EXCLUDE": lambda self: self.expression( 861 exp.ExcludeColumnConstraint, this=self._parse_index_params() 862 ), 863 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 864 "FORMAT": lambda self: self.expression( 865 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 866 ), 867 "GENERATED": lambda self: self._parse_generated_as_identity(), 868 "IDENTITY": lambda self: self._parse_auto_increment(), 869 "INLINE": lambda self: self._parse_inline(), 870 "LIKE": lambda self: self._parse_create_like(), 871 "NOT": lambda self: self._parse_not_constraint(), 872 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 873 "ON": lambda self: ( 874 self._match(TokenType.UPDATE) 875 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 876 ) 877 or self.expression(exp.OnProperty, this=self._parse_id_var()), 878 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 879 "PERIOD": lambda self: self._parse_period_for_system_time(), 880 "PRIMARY KEY": lambda self: self._parse_primary_key(), 881 "REFERENCES": lambda self: self._parse_references(match=False), 882 "TITLE": lambda self: self.expression( 883 exp.TitleColumnConstraint, this=self._parse_var_or_string() 884 ), 885 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 886 "UNIQUE": lambda self: self._parse_unique(), 887 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 888 "WITH": lambda self: self.expression( 889 exp.Properties, expressions=self._parse_wrapped_properties() 890 ), 891 } 892 893 ALTER_PARSERS = { 894 "ADD": lambda self: self._parse_alter_table_add(), 895 "ALTER": lambda self: self._parse_alter_table_alter(), 896 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 897 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 898 "DROP": lambda self: self._parse_alter_table_drop(), 899 "RENAME": lambda self: self._parse_alter_table_rename(), 900 } 901 902 SCHEMA_UNNAMED_CONSTRAINTS = { 903 "CHECK", 904 "EXCLUDE", 905 "FOREIGN KEY", 906 "LIKE", 907 "PERIOD", 908 "PRIMARY KEY", 909 "UNIQUE", 910 } 911 912 NO_PAREN_FUNCTION_PARSERS = { 913 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 914 "CASE": lambda self: self._parse_case(), 915 "IF": lambda self: self._parse_if(), 916 "NEXT": lambda self: self._parse_next_value_for(), 917 } 918 919 INVALID_FUNC_NAME_TOKENS = { 920 TokenType.IDENTIFIER, 921 TokenType.STRING, 922 } 923 924 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 925 926 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 927 928 FUNCTION_PARSERS = { 929 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 930 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 931 "DECODE": lambda self: self._parse_decode(), 932 "EXTRACT": lambda self: self._parse_extract(), 933 "JSON_OBJECT": lambda self: self._parse_json_object(), 934 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 935 "JSON_TABLE": lambda self: self._parse_json_table(), 936 "MATCH": lambda self: self._parse_match_against(), 937 "OPENJSON": lambda self: self._parse_open_json(), 938 "POSITION": lambda self: self._parse_position(), 939 "PREDICT": lambda self: self._parse_predict(), 940 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 941 "STRING_AGG": lambda self: self._parse_string_agg(), 942 "SUBSTRING": lambda self: self._parse_substring(), 943 "TRIM": lambda self: self._parse_trim(), 944 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 945 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 946 } 947 948 QUERY_MODIFIER_PARSERS = { 949 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 950 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 951 TokenType.WHERE: lambda self: ("where", self._parse_where()), 952 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 953 TokenType.HAVING: lambda self: ("having", self._parse_having()), 954 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 955 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 956 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 957 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 958 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 959 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 960 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 961 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 962 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 963 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 964 TokenType.CLUSTER_BY: lambda self: ( 965 "cluster", 966 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 967 ), 968 TokenType.DISTRIBUTE_BY: lambda self: ( 969 "distribute", 970 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 971 ), 972 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 973 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 974 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 975 } 976 977 SET_PARSERS = { 978 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 979 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 980 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 981 "TRANSACTION": lambda self: self._parse_set_transaction(), 982 } 983 984 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 985 986 TYPE_LITERAL_PARSERS = { 987 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 988 } 989 990 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 991 992 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 993 994 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 995 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 996 "ISOLATION": ( 997 ("LEVEL", "REPEATABLE", "READ"), 998 ("LEVEL", "READ", "COMMITTED"), 999 ("LEVEL", "READ", "UNCOMITTED"), 1000 ("LEVEL", "SERIALIZABLE"), 1001 ), 1002 "READ": ("WRITE", "ONLY"), 1003 } 1004 1005 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1006 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1007 ) 1008 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1009 1010 CREATE_SEQUENCE: OPTIONS_TYPE = { 1011 "SCALE": ("EXTEND", "NOEXTEND"), 1012 "SHARD": ("EXTEND", "NOEXTEND"), 1013 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1014 **dict.fromkeys( 1015 ( 1016 "SESSION", 1017 "GLOBAL", 1018 "KEEP", 1019 "NOKEEP", 1020 "ORDER", 1021 "NOORDER", 1022 "NOCACHE", 1023 "CYCLE", 1024 "NOCYCLE", 1025 "NOMINVALUE", 1026 "NOMAXVALUE", 1027 "NOSCALE", 1028 "NOSHARD", 1029 ), 1030 tuple(), 1031 ), 1032 } 1033 1034 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1035 1036 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1037 1038 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1039 1040 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1041 1042 CLONE_KEYWORDS = {"CLONE", "COPY"} 1043 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1044 1045 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1046 1047 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1048 1049 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1050 1051 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1052 1053 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1054 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1055 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1056 1057 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1058 1059 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1060 1061 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1062 1063 DISTINCT_TOKENS = {TokenType.DISTINCT} 1064 1065 NULL_TOKENS = {TokenType.NULL} 1066 1067 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1068 1069 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1070 1071 STRICT_CAST = True 1072 1073 PREFIXED_PIVOT_COLUMNS = False 1074 IDENTIFY_PIVOT_STRINGS = False 1075 1076 LOG_DEFAULTS_TO_LN = False 1077 1078 # Whether ADD is present for each column added by ALTER TABLE 1079 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1080 1081 # Whether the table sample clause expects CSV syntax 1082 TABLESAMPLE_CSV = False 1083 1084 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1085 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1086 1087 # Whether the TRIM function expects the characters to trim as its first argument 1088 TRIM_PATTERN_FIRST = False 1089 1090 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1091 STRING_ALIASES = False 1092 1093 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1094 MODIFIERS_ATTACHED_TO_UNION = True 1095 UNION_MODIFIERS = {"order", "limit", "offset"} 1096 1097 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1098 NO_PAREN_IF_COMMANDS = True 1099 1100 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1101 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1102 1103 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1104 # If this is True and '(' is not found, the keyword will be treated as an identifier 1105 VALUES_FOLLOWED_BY_PAREN = True 1106 1107 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1108 SUPPORTS_IMPLICIT_UNNEST = False 1109 1110 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1111 INTERVAL_SPANS = True 1112 1113 __slots__ = ( 1114 "error_level", 1115 "error_message_context", 1116 "max_errors", 1117 "dialect", 1118 "sql", 1119 "errors", 1120 "_tokens", 1121 "_index", 1122 "_curr", 1123 "_next", 1124 "_prev", 1125 "_prev_comments", 1126 ) 1127 1128 # Autofilled 1129 SHOW_TRIE: t.Dict = {} 1130 SET_TRIE: t.Dict = {} 1131 1132 def __init__( 1133 self, 1134 error_level: t.Optional[ErrorLevel] = None, 1135 error_message_context: int = 100, 1136 max_errors: int = 3, 1137 dialect: DialectType = None, 1138 ): 1139 from sqlglot.dialects import Dialect 1140 1141 self.error_level = error_level or ErrorLevel.IMMEDIATE 1142 self.error_message_context = error_message_context 1143 self.max_errors = max_errors 1144 self.dialect = Dialect.get_or_raise(dialect) 1145 self.reset() 1146 1147 def reset(self): 1148 self.sql = "" 1149 self.errors = [] 1150 self._tokens = [] 1151 self._index = 0 1152 self._curr = None 1153 self._next = None 1154 self._prev = None 1155 self._prev_comments = None 1156 1157 def parse( 1158 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1159 ) -> t.List[t.Optional[exp.Expression]]: 1160 """ 1161 Parses a list of tokens and returns a list of syntax trees, one tree 1162 per parsed SQL statement. 1163 1164 Args: 1165 raw_tokens: The list of tokens. 1166 sql: The original SQL string, used to produce helpful debug messages. 1167 1168 Returns: 1169 The list of the produced syntax trees. 1170 """ 1171 return self._parse( 1172 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1173 ) 1174 1175 def parse_into( 1176 self, 1177 expression_types: exp.IntoType, 1178 raw_tokens: t.List[Token], 1179 sql: t.Optional[str] = None, 1180 ) -> t.List[t.Optional[exp.Expression]]: 1181 """ 1182 Parses a list of tokens into a given Expression type. If a collection of Expression 1183 types is given instead, this method will try to parse the token list into each one 1184 of them, stopping at the first for which the parsing succeeds. 1185 1186 Args: 1187 expression_types: The expression type(s) to try and parse the token list into. 1188 raw_tokens: The list of tokens. 1189 sql: The original SQL string, used to produce helpful debug messages. 1190 1191 Returns: 1192 The target Expression. 1193 """ 1194 errors = [] 1195 for expression_type in ensure_list(expression_types): 1196 parser = self.EXPRESSION_PARSERS.get(expression_type) 1197 if not parser: 1198 raise TypeError(f"No parser registered for {expression_type}") 1199 1200 try: 1201 return self._parse(parser, raw_tokens, sql) 1202 except ParseError as e: 1203 e.errors[0]["into_expression"] = expression_type 1204 errors.append(e) 1205 1206 raise ParseError( 1207 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1208 errors=merge_errors(errors), 1209 ) from errors[-1] 1210 1211 def _parse( 1212 self, 1213 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1214 raw_tokens: t.List[Token], 1215 sql: t.Optional[str] = None, 1216 ) -> t.List[t.Optional[exp.Expression]]: 1217 self.reset() 1218 self.sql = sql or "" 1219 1220 total = len(raw_tokens) 1221 chunks: t.List[t.List[Token]] = [[]] 1222 1223 for i, token in enumerate(raw_tokens): 1224 if token.token_type == TokenType.SEMICOLON: 1225 if i < total - 1: 1226 chunks.append([]) 1227 else: 1228 chunks[-1].append(token) 1229 1230 expressions = [] 1231 1232 for tokens in chunks: 1233 self._index = -1 1234 self._tokens = tokens 1235 self._advance() 1236 1237 expressions.append(parse_method(self)) 1238 1239 if self._index < len(self._tokens): 1240 self.raise_error("Invalid expression / Unexpected token") 1241 1242 self.check_errors() 1243 1244 return expressions 1245 1246 def check_errors(self) -> None: 1247 """Logs or raises any found errors, depending on the chosen error level setting.""" 1248 if self.error_level == ErrorLevel.WARN: 1249 for error in self.errors: 1250 logger.error(str(error)) 1251 elif self.error_level == ErrorLevel.RAISE and self.errors: 1252 raise ParseError( 1253 concat_messages(self.errors, self.max_errors), 1254 errors=merge_errors(self.errors), 1255 ) 1256 1257 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1258 """ 1259 Appends an error in the list of recorded errors or raises it, depending on the chosen 1260 error level setting. 1261 """ 1262 token = token or self._curr or self._prev or Token.string("") 1263 start = token.start 1264 end = token.end + 1 1265 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1266 highlight = self.sql[start:end] 1267 end_context = self.sql[end : end + self.error_message_context] 1268 1269 error = ParseError.new( 1270 f"{message}. Line {token.line}, Col: {token.col}.\n" 1271 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1272 description=message, 1273 line=token.line, 1274 col=token.col, 1275 start_context=start_context, 1276 highlight=highlight, 1277 end_context=end_context, 1278 ) 1279 1280 if self.error_level == ErrorLevel.IMMEDIATE: 1281 raise error 1282 1283 self.errors.append(error) 1284 1285 def expression( 1286 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1287 ) -> E: 1288 """ 1289 Creates a new, validated Expression. 1290 1291 Args: 1292 exp_class: The expression class to instantiate. 1293 comments: An optional list of comments to attach to the expression. 1294 kwargs: The arguments to set for the expression along with their respective values. 1295 1296 Returns: 1297 The target expression. 1298 """ 1299 instance = exp_class(**kwargs) 1300 instance.add_comments(comments) if comments else self._add_comments(instance) 1301 return self.validate_expression(instance) 1302 1303 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1304 if expression and self._prev_comments: 1305 expression.add_comments(self._prev_comments) 1306 self._prev_comments = None 1307 1308 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1309 """ 1310 Validates an Expression, making sure that all its mandatory arguments are set. 1311 1312 Args: 1313 expression: The expression to validate. 1314 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1315 1316 Returns: 1317 The validated expression. 1318 """ 1319 if self.error_level != ErrorLevel.IGNORE: 1320 for error_message in expression.error_messages(args): 1321 self.raise_error(error_message) 1322 1323 return expression 1324 1325 def _find_sql(self, start: Token, end: Token) -> str: 1326 return self.sql[start.start : end.end + 1] 1327 1328 def _is_connected(self) -> bool: 1329 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1330 1331 def _advance(self, times: int = 1) -> None: 1332 self._index += times 1333 self._curr = seq_get(self._tokens, self._index) 1334 self._next = seq_get(self._tokens, self._index + 1) 1335 1336 if self._index > 0: 1337 self._prev = self._tokens[self._index - 1] 1338 self._prev_comments = self._prev.comments 1339 else: 1340 self._prev = None 1341 self._prev_comments = None 1342 1343 def _retreat(self, index: int) -> None: 1344 if index != self._index: 1345 self._advance(index - self._index) 1346 1347 def _warn_unsupported(self) -> None: 1348 if len(self._tokens) <= 1: 1349 return 1350 1351 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1352 # interested in emitting a warning for the one being currently processed. 1353 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1354 1355 logger.warning( 1356 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1357 ) 1358 1359 def _parse_command(self) -> exp.Command: 1360 self._warn_unsupported() 1361 return self.expression( 1362 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1363 ) 1364 1365 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1366 """ 1367 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1368 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1369 the parser state accordingly 1370 """ 1371 index = self._index 1372 error_level = self.error_level 1373 1374 self.error_level = ErrorLevel.IMMEDIATE 1375 try: 1376 this = parse_method() 1377 except ParseError: 1378 this = None 1379 finally: 1380 if not this or retreat: 1381 self._retreat(index) 1382 self.error_level = error_level 1383 1384 return this 1385 1386 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1387 start = self._prev 1388 exists = self._parse_exists() if allow_exists else None 1389 1390 self._match(TokenType.ON) 1391 1392 kind = self._match_set(self.CREATABLES) and self._prev 1393 if not kind: 1394 return self._parse_as_command(start) 1395 1396 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1397 this = self._parse_user_defined_function(kind=kind.token_type) 1398 elif kind.token_type == TokenType.TABLE: 1399 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1400 elif kind.token_type == TokenType.COLUMN: 1401 this = self._parse_column() 1402 else: 1403 this = self._parse_id_var() 1404 1405 self._match(TokenType.IS) 1406 1407 return self.expression( 1408 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1409 ) 1410 1411 def _parse_to_table( 1412 self, 1413 ) -> exp.ToTableProperty: 1414 table = self._parse_table_parts(schema=True) 1415 return self.expression(exp.ToTableProperty, this=table) 1416 1417 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1418 def _parse_ttl(self) -> exp.Expression: 1419 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1420 this = self._parse_bitwise() 1421 1422 if self._match_text_seq("DELETE"): 1423 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1424 if self._match_text_seq("RECOMPRESS"): 1425 return self.expression( 1426 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1427 ) 1428 if self._match_text_seq("TO", "DISK"): 1429 return self.expression( 1430 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1431 ) 1432 if self._match_text_seq("TO", "VOLUME"): 1433 return self.expression( 1434 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1435 ) 1436 1437 return this 1438 1439 expressions = self._parse_csv(_parse_ttl_action) 1440 where = self._parse_where() 1441 group = self._parse_group() 1442 1443 aggregates = None 1444 if group and self._match(TokenType.SET): 1445 aggregates = self._parse_csv(self._parse_set_item) 1446 1447 return self.expression( 1448 exp.MergeTreeTTL, 1449 expressions=expressions, 1450 where=where, 1451 group=group, 1452 aggregates=aggregates, 1453 ) 1454 1455 def _parse_statement(self) -> t.Optional[exp.Expression]: 1456 if self._curr is None: 1457 return None 1458 1459 if self._match_set(self.STATEMENT_PARSERS): 1460 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1461 1462 if self._match_set(Tokenizer.COMMANDS): 1463 return self._parse_command() 1464 1465 expression = self._parse_expression() 1466 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1467 return self._parse_query_modifiers(expression) 1468 1469 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1470 start = self._prev 1471 temporary = self._match(TokenType.TEMPORARY) 1472 materialized = self._match_text_seq("MATERIALIZED") 1473 1474 kind = self._match_set(self.CREATABLES) and self._prev.text 1475 if not kind: 1476 return self._parse_as_command(start) 1477 1478 if_exists = exists or self._parse_exists() 1479 table = self._parse_table_parts( 1480 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1481 ) 1482 1483 if self._match(TokenType.L_PAREN, advance=False): 1484 expressions = self._parse_wrapped_csv(self._parse_types) 1485 else: 1486 expressions = None 1487 1488 return self.expression( 1489 exp.Drop, 1490 comments=start.comments, 1491 exists=if_exists, 1492 this=table, 1493 expressions=expressions, 1494 kind=kind, 1495 temporary=temporary, 1496 materialized=materialized, 1497 cascade=self._match_text_seq("CASCADE"), 1498 constraints=self._match_text_seq("CONSTRAINTS"), 1499 purge=self._match_text_seq("PURGE"), 1500 ) 1501 1502 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1503 return ( 1504 self._match_text_seq("IF") 1505 and (not not_ or self._match(TokenType.NOT)) 1506 and self._match(TokenType.EXISTS) 1507 ) 1508 1509 def _parse_create(self) -> exp.Create | exp.Command: 1510 # Note: this can't be None because we've matched a statement parser 1511 start = self._prev 1512 comments = self._prev_comments 1513 1514 replace = ( 1515 start.token_type == TokenType.REPLACE 1516 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1517 or self._match_pair(TokenType.OR, TokenType.ALTER) 1518 ) 1519 1520 unique = self._match(TokenType.UNIQUE) 1521 1522 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1523 self._advance() 1524 1525 properties = None 1526 create_token = self._match_set(self.CREATABLES) and self._prev 1527 1528 if not create_token: 1529 # exp.Properties.Location.POST_CREATE 1530 properties = self._parse_properties() 1531 create_token = self._match_set(self.CREATABLES) and self._prev 1532 1533 if not properties or not create_token: 1534 return self._parse_as_command(start) 1535 1536 exists = self._parse_exists(not_=True) 1537 this = None 1538 expression: t.Optional[exp.Expression] = None 1539 indexes = None 1540 no_schema_binding = None 1541 begin = None 1542 end = None 1543 clone = None 1544 1545 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1546 nonlocal properties 1547 if properties and temp_props: 1548 properties.expressions.extend(temp_props.expressions) 1549 elif temp_props: 1550 properties = temp_props 1551 1552 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1553 this = self._parse_user_defined_function(kind=create_token.token_type) 1554 1555 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1556 extend_props(self._parse_properties()) 1557 1558 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1559 1560 if not expression: 1561 if self._match(TokenType.COMMAND): 1562 expression = self._parse_as_command(self._prev) 1563 else: 1564 begin = self._match(TokenType.BEGIN) 1565 return_ = self._match_text_seq("RETURN") 1566 1567 if self._match(TokenType.STRING, advance=False): 1568 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1569 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1570 expression = self._parse_string() 1571 extend_props(self._parse_properties()) 1572 else: 1573 expression = self._parse_statement() 1574 1575 end = self._match_text_seq("END") 1576 1577 if return_: 1578 expression = self.expression(exp.Return, this=expression) 1579 elif create_token.token_type == TokenType.INDEX: 1580 this = self._parse_index(index=self._parse_id_var()) 1581 elif create_token.token_type in self.DB_CREATABLES: 1582 table_parts = self._parse_table_parts( 1583 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1584 ) 1585 1586 # exp.Properties.Location.POST_NAME 1587 self._match(TokenType.COMMA) 1588 extend_props(self._parse_properties(before=True)) 1589 1590 this = self._parse_schema(this=table_parts) 1591 1592 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1593 extend_props(self._parse_properties()) 1594 1595 self._match(TokenType.ALIAS) 1596 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1597 # exp.Properties.Location.POST_ALIAS 1598 extend_props(self._parse_properties()) 1599 1600 if create_token.token_type == TokenType.SEQUENCE: 1601 expression = self._parse_types() 1602 extend_props(self._parse_properties()) 1603 else: 1604 expression = self._parse_ddl_select() 1605 1606 if create_token.token_type == TokenType.TABLE: 1607 # exp.Properties.Location.POST_EXPRESSION 1608 extend_props(self._parse_properties()) 1609 1610 indexes = [] 1611 while True: 1612 index = self._parse_index() 1613 1614 # exp.Properties.Location.POST_INDEX 1615 extend_props(self._parse_properties()) 1616 1617 if not index: 1618 break 1619 else: 1620 self._match(TokenType.COMMA) 1621 indexes.append(index) 1622 elif create_token.token_type == TokenType.VIEW: 1623 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1624 no_schema_binding = True 1625 1626 shallow = self._match_text_seq("SHALLOW") 1627 1628 if self._match_texts(self.CLONE_KEYWORDS): 1629 copy = self._prev.text.lower() == "copy" 1630 clone = self.expression( 1631 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1632 ) 1633 1634 if self._curr: 1635 return self._parse_as_command(start) 1636 1637 return self.expression( 1638 exp.Create, 1639 comments=comments, 1640 this=this, 1641 kind=create_token.text.upper(), 1642 replace=replace, 1643 unique=unique, 1644 expression=expression, 1645 exists=exists, 1646 properties=properties, 1647 indexes=indexes, 1648 no_schema_binding=no_schema_binding, 1649 begin=begin, 1650 end=end, 1651 clone=clone, 1652 ) 1653 1654 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1655 seq = exp.SequenceProperties() 1656 1657 options = [] 1658 index = self._index 1659 1660 while self._curr: 1661 if self._match_text_seq("INCREMENT"): 1662 self._match_text_seq("BY") 1663 self._match_text_seq("=") 1664 seq.set("increment", self._parse_term()) 1665 elif self._match_text_seq("MINVALUE"): 1666 seq.set("minvalue", self._parse_term()) 1667 elif self._match_text_seq("MAXVALUE"): 1668 seq.set("maxvalue", self._parse_term()) 1669 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1670 self._match_text_seq("=") 1671 seq.set("start", self._parse_term()) 1672 elif self._match_text_seq("CACHE"): 1673 # T-SQL allows empty CACHE which is initialized dynamically 1674 seq.set("cache", self._parse_number() or True) 1675 elif self._match_text_seq("OWNED", "BY"): 1676 # "OWNED BY NONE" is the default 1677 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1678 else: 1679 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1680 if opt: 1681 options.append(opt) 1682 else: 1683 break 1684 1685 seq.set("options", options if options else None) 1686 return None if self._index == index else seq 1687 1688 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1689 # only used for teradata currently 1690 self._match(TokenType.COMMA) 1691 1692 kwargs = { 1693 "no": self._match_text_seq("NO"), 1694 "dual": self._match_text_seq("DUAL"), 1695 "before": self._match_text_seq("BEFORE"), 1696 "default": self._match_text_seq("DEFAULT"), 1697 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1698 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1699 "after": self._match_text_seq("AFTER"), 1700 "minimum": self._match_texts(("MIN", "MINIMUM")), 1701 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1702 } 1703 1704 if self._match_texts(self.PROPERTY_PARSERS): 1705 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1706 try: 1707 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1708 except TypeError: 1709 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1710 1711 return None 1712 1713 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1714 return self._parse_wrapped_csv(self._parse_property) 1715 1716 def _parse_property(self) -> t.Optional[exp.Expression]: 1717 if self._match_texts(self.PROPERTY_PARSERS): 1718 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1719 1720 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1721 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1722 1723 if self._match_text_seq("COMPOUND", "SORTKEY"): 1724 return self._parse_sortkey(compound=True) 1725 1726 if self._match_text_seq("SQL", "SECURITY"): 1727 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1728 1729 index = self._index 1730 key = self._parse_column() 1731 1732 if not self._match(TokenType.EQ): 1733 self._retreat(index) 1734 return self._parse_sequence_properties() 1735 1736 return self.expression( 1737 exp.Property, 1738 this=key.to_dot() if isinstance(key, exp.Column) else key, 1739 value=self._parse_bitwise() or self._parse_var(any_token=True), 1740 ) 1741 1742 def _parse_stored(self) -> exp.FileFormatProperty: 1743 self._match(TokenType.ALIAS) 1744 1745 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1746 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1747 1748 return self.expression( 1749 exp.FileFormatProperty, 1750 this=( 1751 self.expression( 1752 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1753 ) 1754 if input_format or output_format 1755 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1756 ), 1757 ) 1758 1759 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1760 self._match(TokenType.EQ) 1761 self._match(TokenType.ALIAS) 1762 return self.expression(exp_class, this=self._parse_field(), **kwargs) 1763 1764 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1765 properties = [] 1766 while True: 1767 if before: 1768 prop = self._parse_property_before() 1769 else: 1770 prop = self._parse_property() 1771 if not prop: 1772 break 1773 for p in ensure_list(prop): 1774 properties.append(p) 1775 1776 if properties: 1777 return self.expression(exp.Properties, expressions=properties) 1778 1779 return None 1780 1781 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1782 return self.expression( 1783 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1784 ) 1785 1786 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1787 if self._index >= 2: 1788 pre_volatile_token = self._tokens[self._index - 2] 1789 else: 1790 pre_volatile_token = None 1791 1792 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1793 return exp.VolatileProperty() 1794 1795 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1796 1797 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1798 self._match_pair(TokenType.EQ, TokenType.ON) 1799 1800 prop = self.expression(exp.WithSystemVersioningProperty) 1801 if self._match(TokenType.L_PAREN): 1802 self._match_text_seq("HISTORY_TABLE", "=") 1803 prop.set("this", self._parse_table_parts()) 1804 1805 if self._match(TokenType.COMMA): 1806 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1807 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1808 1809 self._match_r_paren() 1810 1811 return prop 1812 1813 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1814 if self._match(TokenType.L_PAREN, advance=False): 1815 return self._parse_wrapped_properties() 1816 1817 if self._match_text_seq("JOURNAL"): 1818 return self._parse_withjournaltable() 1819 1820 if self._match_texts(self.VIEW_ATTRIBUTES): 1821 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1822 1823 if self._match_text_seq("DATA"): 1824 return self._parse_withdata(no=False) 1825 elif self._match_text_seq("NO", "DATA"): 1826 return self._parse_withdata(no=True) 1827 1828 if not self._next: 1829 return None 1830 1831 return self._parse_withisolatedloading() 1832 1833 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1834 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1835 self._match(TokenType.EQ) 1836 1837 user = self._parse_id_var() 1838 self._match(TokenType.PARAMETER) 1839 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1840 1841 if not user or not host: 1842 return None 1843 1844 return exp.DefinerProperty(this=f"{user}@{host}") 1845 1846 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1847 self._match(TokenType.TABLE) 1848 self._match(TokenType.EQ) 1849 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1850 1851 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1852 return self.expression(exp.LogProperty, no=no) 1853 1854 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1855 return self.expression(exp.JournalProperty, **kwargs) 1856 1857 def _parse_checksum(self) -> exp.ChecksumProperty: 1858 self._match(TokenType.EQ) 1859 1860 on = None 1861 if self._match(TokenType.ON): 1862 on = True 1863 elif self._match_text_seq("OFF"): 1864 on = False 1865 1866 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1867 1868 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1869 return self.expression( 1870 exp.Cluster, 1871 expressions=( 1872 self._parse_wrapped_csv(self._parse_ordered) 1873 if wrapped 1874 else self._parse_csv(self._parse_ordered) 1875 ), 1876 ) 1877 1878 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1879 self._match_text_seq("BY") 1880 1881 self._match_l_paren() 1882 expressions = self._parse_csv(self._parse_column) 1883 self._match_r_paren() 1884 1885 if self._match_text_seq("SORTED", "BY"): 1886 self._match_l_paren() 1887 sorted_by = self._parse_csv(self._parse_ordered) 1888 self._match_r_paren() 1889 else: 1890 sorted_by = None 1891 1892 self._match(TokenType.INTO) 1893 buckets = self._parse_number() 1894 self._match_text_seq("BUCKETS") 1895 1896 return self.expression( 1897 exp.ClusteredByProperty, 1898 expressions=expressions, 1899 sorted_by=sorted_by, 1900 buckets=buckets, 1901 ) 1902 1903 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1904 if not self._match_text_seq("GRANTS"): 1905 self._retreat(self._index - 1) 1906 return None 1907 1908 return self.expression(exp.CopyGrantsProperty) 1909 1910 def _parse_freespace(self) -> exp.FreespaceProperty: 1911 self._match(TokenType.EQ) 1912 return self.expression( 1913 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1914 ) 1915 1916 def _parse_mergeblockratio( 1917 self, no: bool = False, default: bool = False 1918 ) -> exp.MergeBlockRatioProperty: 1919 if self._match(TokenType.EQ): 1920 return self.expression( 1921 exp.MergeBlockRatioProperty, 1922 this=self._parse_number(), 1923 percent=self._match(TokenType.PERCENT), 1924 ) 1925 1926 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1927 1928 def _parse_datablocksize( 1929 self, 1930 default: t.Optional[bool] = None, 1931 minimum: t.Optional[bool] = None, 1932 maximum: t.Optional[bool] = None, 1933 ) -> exp.DataBlocksizeProperty: 1934 self._match(TokenType.EQ) 1935 size = self._parse_number() 1936 1937 units = None 1938 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1939 units = self._prev.text 1940 1941 return self.expression( 1942 exp.DataBlocksizeProperty, 1943 size=size, 1944 units=units, 1945 default=default, 1946 minimum=minimum, 1947 maximum=maximum, 1948 ) 1949 1950 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1951 self._match(TokenType.EQ) 1952 always = self._match_text_seq("ALWAYS") 1953 manual = self._match_text_seq("MANUAL") 1954 never = self._match_text_seq("NEVER") 1955 default = self._match_text_seq("DEFAULT") 1956 1957 autotemp = None 1958 if self._match_text_seq("AUTOTEMP"): 1959 autotemp = self._parse_schema() 1960 1961 return self.expression( 1962 exp.BlockCompressionProperty, 1963 always=always, 1964 manual=manual, 1965 never=never, 1966 default=default, 1967 autotemp=autotemp, 1968 ) 1969 1970 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 1971 index = self._index 1972 no = self._match_text_seq("NO") 1973 concurrent = self._match_text_seq("CONCURRENT") 1974 1975 if not self._match_text_seq("ISOLATED", "LOADING"): 1976 self._retreat(index) 1977 return None 1978 1979 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 1980 return self.expression( 1981 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 1982 ) 1983 1984 def _parse_locking(self) -> exp.LockingProperty: 1985 if self._match(TokenType.TABLE): 1986 kind = "TABLE" 1987 elif self._match(TokenType.VIEW): 1988 kind = "VIEW" 1989 elif self._match(TokenType.ROW): 1990 kind = "ROW" 1991 elif self._match_text_seq("DATABASE"): 1992 kind = "DATABASE" 1993 else: 1994 kind = None 1995 1996 if kind in ("DATABASE", "TABLE", "VIEW"): 1997 this = self._parse_table_parts() 1998 else: 1999 this = None 2000 2001 if self._match(TokenType.FOR): 2002 for_or_in = "FOR" 2003 elif self._match(TokenType.IN): 2004 for_or_in = "IN" 2005 else: 2006 for_or_in = None 2007 2008 if self._match_text_seq("ACCESS"): 2009 lock_type = "ACCESS" 2010 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2011 lock_type = "EXCLUSIVE" 2012 elif self._match_text_seq("SHARE"): 2013 lock_type = "SHARE" 2014 elif self._match_text_seq("READ"): 2015 lock_type = "READ" 2016 elif self._match_text_seq("WRITE"): 2017 lock_type = "WRITE" 2018 elif self._match_text_seq("CHECKSUM"): 2019 lock_type = "CHECKSUM" 2020 else: 2021 lock_type = None 2022 2023 override = self._match_text_seq("OVERRIDE") 2024 2025 return self.expression( 2026 exp.LockingProperty, 2027 this=this, 2028 kind=kind, 2029 for_or_in=for_or_in, 2030 lock_type=lock_type, 2031 override=override, 2032 ) 2033 2034 def _parse_partition_by(self) -> t.List[exp.Expression]: 2035 if self._match(TokenType.PARTITION_BY): 2036 return self._parse_csv(self._parse_conjunction) 2037 return [] 2038 2039 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2040 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2041 if self._match_text_seq("MINVALUE"): 2042 return exp.var("MINVALUE") 2043 if self._match_text_seq("MAXVALUE"): 2044 return exp.var("MAXVALUE") 2045 return self._parse_bitwise() 2046 2047 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2048 expression = None 2049 from_expressions = None 2050 to_expressions = None 2051 2052 if self._match(TokenType.IN): 2053 this = self._parse_wrapped_csv(self._parse_bitwise) 2054 elif self._match(TokenType.FROM): 2055 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2056 self._match_text_seq("TO") 2057 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2058 elif self._match_text_seq("WITH", "(", "MODULUS"): 2059 this = self._parse_number() 2060 self._match_text_seq(",", "REMAINDER") 2061 expression = self._parse_number() 2062 self._match_r_paren() 2063 else: 2064 self.raise_error("Failed to parse partition bound spec.") 2065 2066 return self.expression( 2067 exp.PartitionBoundSpec, 2068 this=this, 2069 expression=expression, 2070 from_expressions=from_expressions, 2071 to_expressions=to_expressions, 2072 ) 2073 2074 # https://www.postgresql.org/docs/current/sql-createtable.html 2075 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2076 if not self._match_text_seq("OF"): 2077 self._retreat(self._index - 1) 2078 return None 2079 2080 this = self._parse_table(schema=True) 2081 2082 if self._match(TokenType.DEFAULT): 2083 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2084 elif self._match_text_seq("FOR", "VALUES"): 2085 expression = self._parse_partition_bound_spec() 2086 else: 2087 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2088 2089 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2090 2091 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2092 self._match(TokenType.EQ) 2093 return self.expression( 2094 exp.PartitionedByProperty, 2095 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2096 ) 2097 2098 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2099 if self._match_text_seq("AND", "STATISTICS"): 2100 statistics = True 2101 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2102 statistics = False 2103 else: 2104 statistics = None 2105 2106 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2107 2108 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2109 if self._match_text_seq("SQL"): 2110 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2111 return None 2112 2113 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2114 if self._match_text_seq("SQL", "DATA"): 2115 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2116 return None 2117 2118 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2119 if self._match_text_seq("PRIMARY", "INDEX"): 2120 return exp.NoPrimaryIndexProperty() 2121 if self._match_text_seq("SQL"): 2122 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2123 return None 2124 2125 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2126 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2127 return exp.OnCommitProperty() 2128 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2129 return exp.OnCommitProperty(delete=True) 2130 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2131 2132 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2133 if self._match_text_seq("SQL", "DATA"): 2134 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2135 return None 2136 2137 def _parse_distkey(self) -> exp.DistKeyProperty: 2138 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2139 2140 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2141 table = self._parse_table(schema=True) 2142 2143 options = [] 2144 while self._match_texts(("INCLUDING", "EXCLUDING")): 2145 this = self._prev.text.upper() 2146 2147 id_var = self._parse_id_var() 2148 if not id_var: 2149 return None 2150 2151 options.append( 2152 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2153 ) 2154 2155 return self.expression(exp.LikeProperty, this=table, expressions=options) 2156 2157 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2158 return self.expression( 2159 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2160 ) 2161 2162 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2163 self._match(TokenType.EQ) 2164 return self.expression( 2165 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2166 ) 2167 2168 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2169 self._match_text_seq("WITH", "CONNECTION") 2170 return self.expression( 2171 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2172 ) 2173 2174 def _parse_returns(self) -> exp.ReturnsProperty: 2175 value: t.Optional[exp.Expression] 2176 is_table = self._match(TokenType.TABLE) 2177 2178 if is_table: 2179 if self._match(TokenType.LT): 2180 value = self.expression( 2181 exp.Schema, 2182 this="TABLE", 2183 expressions=self._parse_csv(self._parse_struct_types), 2184 ) 2185 if not self._match(TokenType.GT): 2186 self.raise_error("Expecting >") 2187 else: 2188 value = self._parse_schema(exp.var("TABLE")) 2189 else: 2190 value = self._parse_types() 2191 2192 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2193 2194 def _parse_describe(self) -> exp.Describe: 2195 kind = self._match_set(self.CREATABLES) and self._prev.text 2196 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2197 if not self._match_set(self.ID_VAR_TOKENS, advance=False): 2198 style = None 2199 self._retreat(self._index - 1) 2200 this = self._parse_table(schema=True) 2201 properties = self._parse_properties() 2202 expressions = properties.expressions if properties else None 2203 return self.expression( 2204 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2205 ) 2206 2207 def _parse_insert(self) -> exp.Insert: 2208 comments = ensure_list(self._prev_comments) 2209 hint = self._parse_hint() 2210 overwrite = self._match(TokenType.OVERWRITE) 2211 ignore = self._match(TokenType.IGNORE) 2212 local = self._match_text_seq("LOCAL") 2213 alternative = None 2214 is_function = None 2215 2216 if self._match_text_seq("DIRECTORY"): 2217 this: t.Optional[exp.Expression] = self.expression( 2218 exp.Directory, 2219 this=self._parse_var_or_string(), 2220 local=local, 2221 row_format=self._parse_row_format(match_row=True), 2222 ) 2223 else: 2224 if self._match(TokenType.OR): 2225 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2226 2227 self._match(TokenType.INTO) 2228 comments += ensure_list(self._prev_comments) 2229 self._match(TokenType.TABLE) 2230 is_function = self._match(TokenType.FUNCTION) 2231 2232 this = self._parse_table(schema=True) if not is_function else self._parse_function() 2233 2234 returning = self._parse_returning() 2235 2236 return self.expression( 2237 exp.Insert, 2238 comments=comments, 2239 hint=hint, 2240 is_function=is_function, 2241 this=this, 2242 by_name=self._match_text_seq("BY", "NAME"), 2243 exists=self._parse_exists(), 2244 partition=self._parse_partition(), 2245 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2246 and self._parse_conjunction(), 2247 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2248 conflict=self._parse_on_conflict(), 2249 returning=returning or self._parse_returning(), 2250 overwrite=overwrite, 2251 alternative=alternative, 2252 ignore=ignore, 2253 ) 2254 2255 def _parse_kill(self) -> exp.Kill: 2256 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2257 2258 return self.expression( 2259 exp.Kill, 2260 this=self._parse_primary(), 2261 kind=kind, 2262 ) 2263 2264 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2265 conflict = self._match_text_seq("ON", "CONFLICT") 2266 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2267 2268 if not conflict and not duplicate: 2269 return None 2270 2271 conflict_keys = None 2272 constraint = None 2273 2274 if conflict: 2275 if self._match_text_seq("ON", "CONSTRAINT"): 2276 constraint = self._parse_id_var() 2277 elif self._match(TokenType.L_PAREN): 2278 conflict_keys = self._parse_csv(self._parse_id_var) 2279 self._match_r_paren() 2280 2281 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2282 if self._prev.token_type == TokenType.UPDATE: 2283 self._match(TokenType.SET) 2284 expressions = self._parse_csv(self._parse_equality) 2285 else: 2286 expressions = None 2287 2288 return self.expression( 2289 exp.OnConflict, 2290 duplicate=duplicate, 2291 expressions=expressions, 2292 action=action, 2293 conflict_keys=conflict_keys, 2294 constraint=constraint, 2295 ) 2296 2297 def _parse_returning(self) -> t.Optional[exp.Returning]: 2298 if not self._match(TokenType.RETURNING): 2299 return None 2300 return self.expression( 2301 exp.Returning, 2302 expressions=self._parse_csv(self._parse_expression), 2303 into=self._match(TokenType.INTO) and self._parse_table_part(), 2304 ) 2305 2306 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2307 if not self._match(TokenType.FORMAT): 2308 return None 2309 return self._parse_row_format() 2310 2311 def _parse_row_format( 2312 self, match_row: bool = False 2313 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2314 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2315 return None 2316 2317 if self._match_text_seq("SERDE"): 2318 this = self._parse_string() 2319 2320 serde_properties = None 2321 if self._match(TokenType.SERDE_PROPERTIES): 2322 serde_properties = self.expression( 2323 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2324 ) 2325 2326 return self.expression( 2327 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2328 ) 2329 2330 self._match_text_seq("DELIMITED") 2331 2332 kwargs = {} 2333 2334 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2335 kwargs["fields"] = self._parse_string() 2336 if self._match_text_seq("ESCAPED", "BY"): 2337 kwargs["escaped"] = self._parse_string() 2338 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2339 kwargs["collection_items"] = self._parse_string() 2340 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2341 kwargs["map_keys"] = self._parse_string() 2342 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2343 kwargs["lines"] = self._parse_string() 2344 if self._match_text_seq("NULL", "DEFINED", "AS"): 2345 kwargs["null"] = self._parse_string() 2346 2347 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2348 2349 def _parse_load(self) -> exp.LoadData | exp.Command: 2350 if self._match_text_seq("DATA"): 2351 local = self._match_text_seq("LOCAL") 2352 self._match_text_seq("INPATH") 2353 inpath = self._parse_string() 2354 overwrite = self._match(TokenType.OVERWRITE) 2355 self._match_pair(TokenType.INTO, TokenType.TABLE) 2356 2357 return self.expression( 2358 exp.LoadData, 2359 this=self._parse_table(schema=True), 2360 local=local, 2361 overwrite=overwrite, 2362 inpath=inpath, 2363 partition=self._parse_partition(), 2364 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2365 serde=self._match_text_seq("SERDE") and self._parse_string(), 2366 ) 2367 return self._parse_as_command(self._prev) 2368 2369 def _parse_delete(self) -> exp.Delete: 2370 # This handles MySQL's "Multiple-Table Syntax" 2371 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2372 tables = None 2373 comments = self._prev_comments 2374 if not self._match(TokenType.FROM, advance=False): 2375 tables = self._parse_csv(self._parse_table) or None 2376 2377 returning = self._parse_returning() 2378 2379 return self.expression( 2380 exp.Delete, 2381 comments=comments, 2382 tables=tables, 2383 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2384 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2385 where=self._parse_where(), 2386 returning=returning or self._parse_returning(), 2387 limit=self._parse_limit(), 2388 ) 2389 2390 def _parse_update(self) -> exp.Update: 2391 comments = self._prev_comments 2392 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2393 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2394 returning = self._parse_returning() 2395 return self.expression( 2396 exp.Update, 2397 comments=comments, 2398 **{ # type: ignore 2399 "this": this, 2400 "expressions": expressions, 2401 "from": self._parse_from(joins=True), 2402 "where": self._parse_where(), 2403 "returning": returning or self._parse_returning(), 2404 "order": self._parse_order(), 2405 "limit": self._parse_limit(), 2406 }, 2407 ) 2408 2409 def _parse_uncache(self) -> exp.Uncache: 2410 if not self._match(TokenType.TABLE): 2411 self.raise_error("Expecting TABLE after UNCACHE") 2412 2413 return self.expression( 2414 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2415 ) 2416 2417 def _parse_cache(self) -> exp.Cache: 2418 lazy = self._match_text_seq("LAZY") 2419 self._match(TokenType.TABLE) 2420 table = self._parse_table(schema=True) 2421 2422 options = [] 2423 if self._match_text_seq("OPTIONS"): 2424 self._match_l_paren() 2425 k = self._parse_string() 2426 self._match(TokenType.EQ) 2427 v = self._parse_string() 2428 options = [k, v] 2429 self._match_r_paren() 2430 2431 self._match(TokenType.ALIAS) 2432 return self.expression( 2433 exp.Cache, 2434 this=table, 2435 lazy=lazy, 2436 options=options, 2437 expression=self._parse_select(nested=True), 2438 ) 2439 2440 def _parse_partition(self) -> t.Optional[exp.Partition]: 2441 if not self._match(TokenType.PARTITION): 2442 return None 2443 2444 return self.expression( 2445 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2446 ) 2447 2448 def _parse_value(self) -> exp.Tuple: 2449 if self._match(TokenType.L_PAREN): 2450 expressions = self._parse_csv(self._parse_expression) 2451 self._match_r_paren() 2452 return self.expression(exp.Tuple, expressions=expressions) 2453 2454 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2455 return self.expression(exp.Tuple, expressions=[self._parse_expression()]) 2456 2457 def _parse_projections(self) -> t.List[exp.Expression]: 2458 return self._parse_expressions() 2459 2460 def _parse_select( 2461 self, 2462 nested: bool = False, 2463 table: bool = False, 2464 parse_subquery_alias: bool = True, 2465 parse_set_operation: bool = True, 2466 ) -> t.Optional[exp.Expression]: 2467 cte = self._parse_with() 2468 2469 if cte: 2470 this = self._parse_statement() 2471 2472 if not this: 2473 self.raise_error("Failed to parse any statement following CTE") 2474 return cte 2475 2476 if "with" in this.arg_types: 2477 this.set("with", cte) 2478 else: 2479 self.raise_error(f"{this.key} does not support CTE") 2480 this = cte 2481 2482 return this 2483 2484 # duckdb supports leading with FROM x 2485 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2486 2487 if self._match(TokenType.SELECT): 2488 comments = self._prev_comments 2489 2490 hint = self._parse_hint() 2491 all_ = self._match(TokenType.ALL) 2492 distinct = self._match_set(self.DISTINCT_TOKENS) 2493 2494 kind = ( 2495 self._match(TokenType.ALIAS) 2496 and self._match_texts(("STRUCT", "VALUE")) 2497 and self._prev.text.upper() 2498 ) 2499 2500 if distinct: 2501 distinct = self.expression( 2502 exp.Distinct, 2503 on=self._parse_value() if self._match(TokenType.ON) else None, 2504 ) 2505 2506 if all_ and distinct: 2507 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2508 2509 limit = self._parse_limit(top=True) 2510 projections = self._parse_projections() 2511 2512 this = self.expression( 2513 exp.Select, 2514 kind=kind, 2515 hint=hint, 2516 distinct=distinct, 2517 expressions=projections, 2518 limit=limit, 2519 ) 2520 this.comments = comments 2521 2522 into = self._parse_into() 2523 if into: 2524 this.set("into", into) 2525 2526 if not from_: 2527 from_ = self._parse_from() 2528 2529 if from_: 2530 this.set("from", from_) 2531 2532 this = self._parse_query_modifiers(this) 2533 elif (table or nested) and self._match(TokenType.L_PAREN): 2534 if self._match(TokenType.PIVOT): 2535 this = self._parse_simplified_pivot() 2536 elif self._match(TokenType.FROM): 2537 this = exp.select("*").from_( 2538 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2539 ) 2540 else: 2541 this = ( 2542 self._parse_table() 2543 if table 2544 else self._parse_select(nested=True, parse_set_operation=False) 2545 ) 2546 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2547 2548 self._match_r_paren() 2549 2550 # We return early here so that the UNION isn't attached to the subquery by the 2551 # following call to _parse_set_operations, but instead becomes the parent node 2552 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2553 elif self._match(TokenType.VALUES, advance=False): 2554 this = self._parse_derived_table_values() 2555 elif from_: 2556 this = exp.select("*").from_(from_.this, copy=False) 2557 else: 2558 this = None 2559 2560 if parse_set_operation: 2561 return self._parse_set_operations(this) 2562 return this 2563 2564 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2565 if not skip_with_token and not self._match(TokenType.WITH): 2566 return None 2567 2568 comments = self._prev_comments 2569 recursive = self._match(TokenType.RECURSIVE) 2570 2571 expressions = [] 2572 while True: 2573 expressions.append(self._parse_cte()) 2574 2575 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2576 break 2577 else: 2578 self._match(TokenType.WITH) 2579 2580 return self.expression( 2581 exp.With, comments=comments, expressions=expressions, recursive=recursive 2582 ) 2583 2584 def _parse_cte(self) -> exp.CTE: 2585 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2586 if not alias or not alias.this: 2587 self.raise_error("Expected CTE to have alias") 2588 2589 self._match(TokenType.ALIAS) 2590 2591 if self._match_text_seq("NOT", "MATERIALIZED"): 2592 materialized = False 2593 elif self._match_text_seq("MATERIALIZED"): 2594 materialized = True 2595 else: 2596 materialized = None 2597 2598 return self.expression( 2599 exp.CTE, 2600 this=self._parse_wrapped(self._parse_statement), 2601 alias=alias, 2602 materialized=materialized, 2603 ) 2604 2605 def _parse_table_alias( 2606 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2607 ) -> t.Optional[exp.TableAlias]: 2608 any_token = self._match(TokenType.ALIAS) 2609 alias = ( 2610 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2611 or self._parse_string_as_identifier() 2612 ) 2613 2614 index = self._index 2615 if self._match(TokenType.L_PAREN): 2616 columns = self._parse_csv(self._parse_function_parameter) 2617 self._match_r_paren() if columns else self._retreat(index) 2618 else: 2619 columns = None 2620 2621 if not alias and not columns: 2622 return None 2623 2624 return self.expression(exp.TableAlias, this=alias, columns=columns) 2625 2626 def _parse_subquery( 2627 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2628 ) -> t.Optional[exp.Subquery]: 2629 if not this: 2630 return None 2631 2632 return self.expression( 2633 exp.Subquery, 2634 this=this, 2635 pivots=self._parse_pivots(), 2636 alias=self._parse_table_alias() if parse_alias else None, 2637 ) 2638 2639 def _implicit_unnests_to_explicit(self, this: E) -> E: 2640 from sqlglot.optimizer.normalize_identifiers import ( 2641 normalize_identifiers as _norm, 2642 ) 2643 2644 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2645 for i, join in enumerate(this.args.get("joins") or []): 2646 table = join.this 2647 normalized_table = table.copy() 2648 normalized_table.meta["maybe_column"] = True 2649 normalized_table = _norm(normalized_table, dialect=self.dialect) 2650 2651 if isinstance(table, exp.Table) and not join.args.get("on"): 2652 if normalized_table.parts[0].name in refs: 2653 table_as_column = table.to_column() 2654 unnest = exp.Unnest(expressions=[table_as_column]) 2655 2656 # Table.to_column creates a parent Alias node that we want to convert to 2657 # a TableAlias and attach to the Unnest, so it matches the parser's output 2658 if isinstance(table.args.get("alias"), exp.TableAlias): 2659 table_as_column.replace(table_as_column.this) 2660 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2661 2662 table.replace(unnest) 2663 2664 refs.add(normalized_table.alias_or_name) 2665 2666 return this 2667 2668 def _parse_query_modifiers( 2669 self, this: t.Optional[exp.Expression] 2670 ) -> t.Optional[exp.Expression]: 2671 if isinstance(this, (exp.Query, exp.Table)): 2672 for join in self._parse_joins(): 2673 this.append("joins", join) 2674 for lateral in iter(self._parse_lateral, None): 2675 this.append("laterals", lateral) 2676 2677 while True: 2678 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2679 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2680 key, expression = parser(self) 2681 2682 if expression: 2683 this.set(key, expression) 2684 if key == "limit": 2685 offset = expression.args.pop("offset", None) 2686 2687 if offset: 2688 offset = exp.Offset(expression=offset) 2689 this.set("offset", offset) 2690 2691 limit_by_expressions = expression.expressions 2692 expression.set("expressions", None) 2693 offset.set("expressions", limit_by_expressions) 2694 continue 2695 break 2696 2697 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2698 this = self._implicit_unnests_to_explicit(this) 2699 2700 return this 2701 2702 def _parse_hint(self) -> t.Optional[exp.Hint]: 2703 if self._match(TokenType.HINT): 2704 hints = [] 2705 for hint in iter( 2706 lambda: self._parse_csv( 2707 lambda: self._parse_function() or self._parse_var(upper=True) 2708 ), 2709 [], 2710 ): 2711 hints.extend(hint) 2712 2713 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2714 self.raise_error("Expected */ after HINT") 2715 2716 return self.expression(exp.Hint, expressions=hints) 2717 2718 return None 2719 2720 def _parse_into(self) -> t.Optional[exp.Into]: 2721 if not self._match(TokenType.INTO): 2722 return None 2723 2724 temp = self._match(TokenType.TEMPORARY) 2725 unlogged = self._match_text_seq("UNLOGGED") 2726 self._match(TokenType.TABLE) 2727 2728 return self.expression( 2729 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2730 ) 2731 2732 def _parse_from( 2733 self, joins: bool = False, skip_from_token: bool = False 2734 ) -> t.Optional[exp.From]: 2735 if not skip_from_token and not self._match(TokenType.FROM): 2736 return None 2737 2738 return self.expression( 2739 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2740 ) 2741 2742 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2743 return self.expression( 2744 exp.MatchRecognizeMeasure, 2745 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2746 this=self._parse_expression(), 2747 ) 2748 2749 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2750 if not self._match(TokenType.MATCH_RECOGNIZE): 2751 return None 2752 2753 self._match_l_paren() 2754 2755 partition = self._parse_partition_by() 2756 order = self._parse_order() 2757 2758 measures = ( 2759 self._parse_csv(self._parse_match_recognize_measure) 2760 if self._match_text_seq("MEASURES") 2761 else None 2762 ) 2763 2764 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2765 rows = exp.var("ONE ROW PER MATCH") 2766 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2767 text = "ALL ROWS PER MATCH" 2768 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2769 text += " SHOW EMPTY MATCHES" 2770 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2771 text += " OMIT EMPTY MATCHES" 2772 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2773 text += " WITH UNMATCHED ROWS" 2774 rows = exp.var(text) 2775 else: 2776 rows = None 2777 2778 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2779 text = "AFTER MATCH SKIP" 2780 if self._match_text_seq("PAST", "LAST", "ROW"): 2781 text += " PAST LAST ROW" 2782 elif self._match_text_seq("TO", "NEXT", "ROW"): 2783 text += " TO NEXT ROW" 2784 elif self._match_text_seq("TO", "FIRST"): 2785 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2786 elif self._match_text_seq("TO", "LAST"): 2787 text += f" TO LAST {self._advance_any().text}" # type: ignore 2788 after = exp.var(text) 2789 else: 2790 after = None 2791 2792 if self._match_text_seq("PATTERN"): 2793 self._match_l_paren() 2794 2795 if not self._curr: 2796 self.raise_error("Expecting )", self._curr) 2797 2798 paren = 1 2799 start = self._curr 2800 2801 while self._curr and paren > 0: 2802 if self._curr.token_type == TokenType.L_PAREN: 2803 paren += 1 2804 if self._curr.token_type == TokenType.R_PAREN: 2805 paren -= 1 2806 2807 end = self._prev 2808 self._advance() 2809 2810 if paren > 0: 2811 self.raise_error("Expecting )", self._curr) 2812 2813 pattern = exp.var(self._find_sql(start, end)) 2814 else: 2815 pattern = None 2816 2817 define = ( 2818 self._parse_csv(self._parse_name_as_expression) 2819 if self._match_text_seq("DEFINE") 2820 else None 2821 ) 2822 2823 self._match_r_paren() 2824 2825 return self.expression( 2826 exp.MatchRecognize, 2827 partition_by=partition, 2828 order=order, 2829 measures=measures, 2830 rows=rows, 2831 after=after, 2832 pattern=pattern, 2833 define=define, 2834 alias=self._parse_table_alias(), 2835 ) 2836 2837 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2838 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2839 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2840 cross_apply = False 2841 2842 if cross_apply is not None: 2843 this = self._parse_select(table=True) 2844 view = None 2845 outer = None 2846 elif self._match(TokenType.LATERAL): 2847 this = self._parse_select(table=True) 2848 view = self._match(TokenType.VIEW) 2849 outer = self._match(TokenType.OUTER) 2850 else: 2851 return None 2852 2853 if not this: 2854 this = ( 2855 self._parse_unnest() 2856 or self._parse_function() 2857 or self._parse_id_var(any_token=False) 2858 ) 2859 2860 while self._match(TokenType.DOT): 2861 this = exp.Dot( 2862 this=this, 2863 expression=self._parse_function() or self._parse_id_var(any_token=False), 2864 ) 2865 2866 if view: 2867 table = self._parse_id_var(any_token=False) 2868 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2869 table_alias: t.Optional[exp.TableAlias] = self.expression( 2870 exp.TableAlias, this=table, columns=columns 2871 ) 2872 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2873 # We move the alias from the lateral's child node to the lateral itself 2874 table_alias = this.args["alias"].pop() 2875 else: 2876 table_alias = self._parse_table_alias() 2877 2878 return self.expression( 2879 exp.Lateral, 2880 this=this, 2881 view=view, 2882 outer=outer, 2883 alias=table_alias, 2884 cross_apply=cross_apply, 2885 ) 2886 2887 def _parse_join_parts( 2888 self, 2889 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2890 return ( 2891 self._match_set(self.JOIN_METHODS) and self._prev, 2892 self._match_set(self.JOIN_SIDES) and self._prev, 2893 self._match_set(self.JOIN_KINDS) and self._prev, 2894 ) 2895 2896 def _parse_join( 2897 self, skip_join_token: bool = False, parse_bracket: bool = False 2898 ) -> t.Optional[exp.Join]: 2899 if self._match(TokenType.COMMA): 2900 return self.expression(exp.Join, this=self._parse_table()) 2901 2902 index = self._index 2903 method, side, kind = self._parse_join_parts() 2904 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2905 join = self._match(TokenType.JOIN) 2906 2907 if not skip_join_token and not join: 2908 self._retreat(index) 2909 kind = None 2910 method = None 2911 side = None 2912 2913 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2914 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2915 2916 if not skip_join_token and not join and not outer_apply and not cross_apply: 2917 return None 2918 2919 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2920 2921 if method: 2922 kwargs["method"] = method.text 2923 if side: 2924 kwargs["side"] = side.text 2925 if kind: 2926 kwargs["kind"] = kind.text 2927 if hint: 2928 kwargs["hint"] = hint 2929 2930 if self._match(TokenType.MATCH_CONDITION): 2931 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 2932 2933 if self._match(TokenType.ON): 2934 kwargs["on"] = self._parse_conjunction() 2935 elif self._match(TokenType.USING): 2936 kwargs["using"] = self._parse_wrapped_id_vars() 2937 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 2938 kind and kind.token_type == TokenType.CROSS 2939 ): 2940 index = self._index 2941 joins: t.Optional[list] = list(self._parse_joins()) 2942 2943 if joins and self._match(TokenType.ON): 2944 kwargs["on"] = self._parse_conjunction() 2945 elif joins and self._match(TokenType.USING): 2946 kwargs["using"] = self._parse_wrapped_id_vars() 2947 else: 2948 joins = None 2949 self._retreat(index) 2950 2951 kwargs["this"].set("joins", joins if joins else None) 2952 2953 comments = [c for token in (method, side, kind) if token for c in token.comments] 2954 return self.expression(exp.Join, comments=comments, **kwargs) 2955 2956 def _parse_opclass(self) -> t.Optional[exp.Expression]: 2957 this = self._parse_conjunction() 2958 2959 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 2960 return this 2961 2962 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 2963 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 2964 2965 return this 2966 2967 def _parse_index_params(self) -> exp.IndexParameters: 2968 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 2969 2970 if self._match(TokenType.L_PAREN, advance=False): 2971 columns = self._parse_wrapped_csv(self._parse_with_operator) 2972 else: 2973 columns = None 2974 2975 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 2976 partition_by = self._parse_partition_by() 2977 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 2978 tablespace = ( 2979 self._parse_var(any_token=True) 2980 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 2981 else None 2982 ) 2983 where = self._parse_where() 2984 2985 return self.expression( 2986 exp.IndexParameters, 2987 using=using, 2988 columns=columns, 2989 include=include, 2990 partition_by=partition_by, 2991 where=where, 2992 with_storage=with_storage, 2993 tablespace=tablespace, 2994 ) 2995 2996 def _parse_index( 2997 self, 2998 index: t.Optional[exp.Expression] = None, 2999 ) -> t.Optional[exp.Index]: 3000 if index: 3001 unique = None 3002 primary = None 3003 amp = None 3004 3005 self._match(TokenType.ON) 3006 self._match(TokenType.TABLE) # hive 3007 table = self._parse_table_parts(schema=True) 3008 else: 3009 unique = self._match(TokenType.UNIQUE) 3010 primary = self._match_text_seq("PRIMARY") 3011 amp = self._match_text_seq("AMP") 3012 3013 if not self._match(TokenType.INDEX): 3014 return None 3015 3016 index = self._parse_id_var() 3017 table = None 3018 3019 params = self._parse_index_params() 3020 3021 return self.expression( 3022 exp.Index, 3023 this=index, 3024 table=table, 3025 unique=unique, 3026 primary=primary, 3027 amp=amp, 3028 params=params, 3029 ) 3030 3031 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3032 hints: t.List[exp.Expression] = [] 3033 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3034 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3035 hints.append( 3036 self.expression( 3037 exp.WithTableHint, 3038 expressions=self._parse_csv( 3039 lambda: self._parse_function() or self._parse_var(any_token=True) 3040 ), 3041 ) 3042 ) 3043 self._match_r_paren() 3044 else: 3045 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3046 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3047 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3048 3049 self._match_texts(("INDEX", "KEY")) 3050 if self._match(TokenType.FOR): 3051 hint.set("target", self._advance_any() and self._prev.text.upper()) 3052 3053 hint.set("expressions", self._parse_wrapped_id_vars()) 3054 hints.append(hint) 3055 3056 return hints or None 3057 3058 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3059 return ( 3060 (not schema and self._parse_function(optional_parens=False)) 3061 or self._parse_id_var(any_token=False) 3062 or self._parse_string_as_identifier() 3063 or self._parse_placeholder() 3064 ) 3065 3066 def _parse_table_parts( 3067 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3068 ) -> exp.Table: 3069 catalog = None 3070 db = None 3071 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3072 3073 while self._match(TokenType.DOT): 3074 if catalog: 3075 # This allows nesting the table in arbitrarily many dot expressions if needed 3076 table = self.expression( 3077 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3078 ) 3079 else: 3080 catalog = db 3081 db = table 3082 # "" used for tsql FROM a..b case 3083 table = self._parse_table_part(schema=schema) or "" 3084 3085 if ( 3086 wildcard 3087 and self._is_connected() 3088 and (isinstance(table, exp.Identifier) or not table) 3089 and self._match(TokenType.STAR) 3090 ): 3091 if isinstance(table, exp.Identifier): 3092 table.args["this"] += "*" 3093 else: 3094 table = exp.Identifier(this="*") 3095 3096 if is_db_reference: 3097 catalog = db 3098 db = table 3099 table = None 3100 3101 if not table and not is_db_reference: 3102 self.raise_error(f"Expected table name but got {self._curr}") 3103 if not db and is_db_reference: 3104 self.raise_error(f"Expected database name but got {self._curr}") 3105 3106 return self.expression( 3107 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 3108 ) 3109 3110 def _parse_table( 3111 self, 3112 schema: bool = False, 3113 joins: bool = False, 3114 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3115 parse_bracket: bool = False, 3116 is_db_reference: bool = False, 3117 ) -> t.Optional[exp.Expression]: 3118 lateral = self._parse_lateral() 3119 if lateral: 3120 return lateral 3121 3122 unnest = self._parse_unnest() 3123 if unnest: 3124 return unnest 3125 3126 values = self._parse_derived_table_values() 3127 if values: 3128 return values 3129 3130 subquery = self._parse_select(table=True) 3131 if subquery: 3132 if not subquery.args.get("pivots"): 3133 subquery.set("pivots", self._parse_pivots()) 3134 return subquery 3135 3136 bracket = parse_bracket and self._parse_bracket(None) 3137 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3138 3139 only = self._match(TokenType.ONLY) 3140 3141 this = t.cast( 3142 exp.Expression, 3143 bracket 3144 or self._parse_bracket( 3145 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3146 ), 3147 ) 3148 3149 if only: 3150 this.set("only", only) 3151 3152 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3153 self._match_text_seq("*") 3154 3155 if schema: 3156 return self._parse_schema(this=this) 3157 3158 version = self._parse_version() 3159 3160 if version: 3161 this.set("version", version) 3162 3163 if self.dialect.ALIAS_POST_TABLESAMPLE: 3164 table_sample = self._parse_table_sample() 3165 3166 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3167 if alias: 3168 this.set("alias", alias) 3169 3170 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3171 return self.expression( 3172 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3173 ) 3174 3175 this.set("hints", self._parse_table_hints()) 3176 3177 if not this.args.get("pivots"): 3178 this.set("pivots", self._parse_pivots()) 3179 3180 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3181 table_sample = self._parse_table_sample() 3182 3183 if table_sample: 3184 table_sample.set("this", this) 3185 this = table_sample 3186 3187 if joins: 3188 for join in self._parse_joins(): 3189 this.append("joins", join) 3190 3191 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3192 this.set("ordinality", True) 3193 this.set("alias", self._parse_table_alias()) 3194 3195 return this 3196 3197 def _parse_version(self) -> t.Optional[exp.Version]: 3198 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3199 this = "TIMESTAMP" 3200 elif self._match(TokenType.VERSION_SNAPSHOT): 3201 this = "VERSION" 3202 else: 3203 return None 3204 3205 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3206 kind = self._prev.text.upper() 3207 start = self._parse_bitwise() 3208 self._match_texts(("TO", "AND")) 3209 end = self._parse_bitwise() 3210 expression: t.Optional[exp.Expression] = self.expression( 3211 exp.Tuple, expressions=[start, end] 3212 ) 3213 elif self._match_text_seq("CONTAINED", "IN"): 3214 kind = "CONTAINED IN" 3215 expression = self.expression( 3216 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3217 ) 3218 elif self._match(TokenType.ALL): 3219 kind = "ALL" 3220 expression = None 3221 else: 3222 self._match_text_seq("AS", "OF") 3223 kind = "AS OF" 3224 expression = self._parse_type() 3225 3226 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3227 3228 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3229 if not self._match(TokenType.UNNEST): 3230 return None 3231 3232 expressions = self._parse_wrapped_csv(self._parse_equality) 3233 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3234 3235 alias = self._parse_table_alias() if with_alias else None 3236 3237 if alias: 3238 if self.dialect.UNNEST_COLUMN_ONLY: 3239 if alias.args.get("columns"): 3240 self.raise_error("Unexpected extra column alias in unnest.") 3241 3242 alias.set("columns", [alias.this]) 3243 alias.set("this", None) 3244 3245 columns = alias.args.get("columns") or [] 3246 if offset and len(expressions) < len(columns): 3247 offset = columns.pop() 3248 3249 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3250 self._match(TokenType.ALIAS) 3251 offset = self._parse_id_var( 3252 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3253 ) or exp.to_identifier("offset") 3254 3255 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3256 3257 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3258 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3259 if not is_derived and not self._match_text_seq("VALUES"): 3260 return None 3261 3262 expressions = self._parse_csv(self._parse_value) 3263 alias = self._parse_table_alias() 3264 3265 if is_derived: 3266 self._match_r_paren() 3267 3268 return self.expression( 3269 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3270 ) 3271 3272 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3273 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3274 as_modifier and self._match_text_seq("USING", "SAMPLE") 3275 ): 3276 return None 3277 3278 bucket_numerator = None 3279 bucket_denominator = None 3280 bucket_field = None 3281 percent = None 3282 size = None 3283 seed = None 3284 3285 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3286 matched_l_paren = self._match(TokenType.L_PAREN) 3287 3288 if self.TABLESAMPLE_CSV: 3289 num = None 3290 expressions = self._parse_csv(self._parse_primary) 3291 else: 3292 expressions = None 3293 num = ( 3294 self._parse_factor() 3295 if self._match(TokenType.NUMBER, advance=False) 3296 else self._parse_primary() or self._parse_placeholder() 3297 ) 3298 3299 if self._match_text_seq("BUCKET"): 3300 bucket_numerator = self._parse_number() 3301 self._match_text_seq("OUT", "OF") 3302 bucket_denominator = bucket_denominator = self._parse_number() 3303 self._match(TokenType.ON) 3304 bucket_field = self._parse_field() 3305 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3306 percent = num 3307 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3308 size = num 3309 else: 3310 percent = num 3311 3312 if matched_l_paren: 3313 self._match_r_paren() 3314 3315 if self._match(TokenType.L_PAREN): 3316 method = self._parse_var(upper=True) 3317 seed = self._match(TokenType.COMMA) and self._parse_number() 3318 self._match_r_paren() 3319 elif self._match_texts(("SEED", "REPEATABLE")): 3320 seed = self._parse_wrapped(self._parse_number) 3321 3322 return self.expression( 3323 exp.TableSample, 3324 expressions=expressions, 3325 method=method, 3326 bucket_numerator=bucket_numerator, 3327 bucket_denominator=bucket_denominator, 3328 bucket_field=bucket_field, 3329 percent=percent, 3330 size=size, 3331 seed=seed, 3332 ) 3333 3334 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3335 return list(iter(self._parse_pivot, None)) or None 3336 3337 def _parse_joins(self) -> t.Iterator[exp.Join]: 3338 return iter(self._parse_join, None) 3339 3340 # https://duckdb.org/docs/sql/statements/pivot 3341 def _parse_simplified_pivot(self) -> exp.Pivot: 3342 def _parse_on() -> t.Optional[exp.Expression]: 3343 this = self._parse_bitwise() 3344 return self._parse_in(this) if self._match(TokenType.IN) else this 3345 3346 this = self._parse_table() 3347 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3348 using = self._match(TokenType.USING) and self._parse_csv( 3349 lambda: self._parse_alias(self._parse_function()) 3350 ) 3351 group = self._parse_group() 3352 return self.expression( 3353 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3354 ) 3355 3356 def _parse_pivot_in(self) -> exp.In: 3357 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3358 this = self._parse_conjunction() 3359 3360 self._match(TokenType.ALIAS) 3361 alias = self._parse_field() 3362 if alias: 3363 return self.expression(exp.PivotAlias, this=this, alias=alias) 3364 3365 return this 3366 3367 value = self._parse_column() 3368 3369 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3370 self.raise_error("Expecting IN (") 3371 3372 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3373 3374 self._match_r_paren() 3375 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3376 3377 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3378 index = self._index 3379 include_nulls = None 3380 3381 if self._match(TokenType.PIVOT): 3382 unpivot = False 3383 elif self._match(TokenType.UNPIVOT): 3384 unpivot = True 3385 3386 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3387 if self._match_text_seq("INCLUDE", "NULLS"): 3388 include_nulls = True 3389 elif self._match_text_seq("EXCLUDE", "NULLS"): 3390 include_nulls = False 3391 else: 3392 return None 3393 3394 expressions = [] 3395 3396 if not self._match(TokenType.L_PAREN): 3397 self._retreat(index) 3398 return None 3399 3400 if unpivot: 3401 expressions = self._parse_csv(self._parse_column) 3402 else: 3403 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3404 3405 if not expressions: 3406 self.raise_error("Failed to parse PIVOT's aggregation list") 3407 3408 if not self._match(TokenType.FOR): 3409 self.raise_error("Expecting FOR") 3410 3411 field = self._parse_pivot_in() 3412 3413 self._match_r_paren() 3414 3415 pivot = self.expression( 3416 exp.Pivot, 3417 expressions=expressions, 3418 field=field, 3419 unpivot=unpivot, 3420 include_nulls=include_nulls, 3421 ) 3422 3423 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3424 pivot.set("alias", self._parse_table_alias()) 3425 3426 if not unpivot: 3427 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3428 3429 columns: t.List[exp.Expression] = [] 3430 for fld in pivot.args["field"].expressions: 3431 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3432 for name in names: 3433 if self.PREFIXED_PIVOT_COLUMNS: 3434 name = f"{name}_{field_name}" if name else field_name 3435 else: 3436 name = f"{field_name}_{name}" if name else field_name 3437 3438 columns.append(exp.to_identifier(name)) 3439 3440 pivot.set("columns", columns) 3441 3442 return pivot 3443 3444 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3445 return [agg.alias for agg in aggregations] 3446 3447 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3448 if not skip_where_token and not self._match(TokenType.PREWHERE): 3449 return None 3450 3451 return self.expression( 3452 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3453 ) 3454 3455 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3456 if not skip_where_token and not self._match(TokenType.WHERE): 3457 return None 3458 3459 return self.expression( 3460 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3461 ) 3462 3463 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3464 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3465 return None 3466 3467 elements: t.Dict[str, t.Any] = defaultdict(list) 3468 3469 if self._match(TokenType.ALL): 3470 elements["all"] = True 3471 elif self._match(TokenType.DISTINCT): 3472 elements["all"] = False 3473 3474 while True: 3475 expressions = self._parse_csv(self._parse_conjunction) 3476 if expressions: 3477 elements["expressions"].extend(expressions) 3478 3479 grouping_sets = self._parse_grouping_sets() 3480 if grouping_sets: 3481 elements["grouping_sets"].extend(grouping_sets) 3482 3483 rollup = None 3484 cube = None 3485 totals = None 3486 3487 index = self._index 3488 with_ = self._match(TokenType.WITH) 3489 if self._match(TokenType.ROLLUP): 3490 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3491 elements["rollup"].extend(ensure_list(rollup)) 3492 3493 if self._match(TokenType.CUBE): 3494 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3495 elements["cube"].extend(ensure_list(cube)) 3496 3497 if self._match_text_seq("TOTALS"): 3498 totals = True 3499 elements["totals"] = True # type: ignore 3500 3501 if not (grouping_sets or rollup or cube or totals): 3502 if with_: 3503 self._retreat(index) 3504 break 3505 3506 return self.expression(exp.Group, **elements) # type: ignore 3507 3508 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3509 if not self._match(TokenType.GROUPING_SETS): 3510 return None 3511 3512 return self._parse_wrapped_csv(self._parse_grouping_set) 3513 3514 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3515 if self._match(TokenType.L_PAREN): 3516 grouping_set = self._parse_csv(self._parse_column) 3517 self._match_r_paren() 3518 return self.expression(exp.Tuple, expressions=grouping_set) 3519 3520 return self._parse_column() 3521 3522 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3523 if not skip_having_token and not self._match(TokenType.HAVING): 3524 return None 3525 return self.expression(exp.Having, this=self._parse_conjunction()) 3526 3527 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3528 if not self._match(TokenType.QUALIFY): 3529 return None 3530 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3531 3532 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3533 if skip_start_token: 3534 start = None 3535 elif self._match(TokenType.START_WITH): 3536 start = self._parse_conjunction() 3537 else: 3538 return None 3539 3540 self._match(TokenType.CONNECT_BY) 3541 nocycle = self._match_text_seq("NOCYCLE") 3542 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3543 exp.Prior, this=self._parse_bitwise() 3544 ) 3545 connect = self._parse_conjunction() 3546 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3547 3548 if not start and self._match(TokenType.START_WITH): 3549 start = self._parse_conjunction() 3550 3551 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3552 3553 def _parse_name_as_expression(self) -> exp.Alias: 3554 return self.expression( 3555 exp.Alias, 3556 alias=self._parse_id_var(any_token=True), 3557 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3558 ) 3559 3560 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3561 if self._match_text_seq("INTERPOLATE"): 3562 return self._parse_wrapped_csv(self._parse_name_as_expression) 3563 return None 3564 3565 def _parse_order( 3566 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3567 ) -> t.Optional[exp.Expression]: 3568 siblings = None 3569 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3570 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3571 return this 3572 3573 siblings = True 3574 3575 return self.expression( 3576 exp.Order, 3577 this=this, 3578 expressions=self._parse_csv(self._parse_ordered), 3579 interpolate=self._parse_interpolate(), 3580 siblings=siblings, 3581 ) 3582 3583 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3584 if not self._match(token): 3585 return None 3586 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3587 3588 def _parse_ordered( 3589 self, parse_method: t.Optional[t.Callable] = None 3590 ) -> t.Optional[exp.Ordered]: 3591 this = parse_method() if parse_method else self._parse_conjunction() 3592 if not this: 3593 return None 3594 3595 asc = self._match(TokenType.ASC) 3596 desc = self._match(TokenType.DESC) or (asc and False) 3597 3598 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3599 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3600 3601 nulls_first = is_nulls_first or False 3602 explicitly_null_ordered = is_nulls_first or is_nulls_last 3603 3604 if ( 3605 not explicitly_null_ordered 3606 and ( 3607 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3608 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3609 ) 3610 and self.dialect.NULL_ORDERING != "nulls_are_last" 3611 ): 3612 nulls_first = True 3613 3614 if self._match_text_seq("WITH", "FILL"): 3615 with_fill = self.expression( 3616 exp.WithFill, 3617 **{ # type: ignore 3618 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3619 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3620 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3621 }, 3622 ) 3623 else: 3624 with_fill = None 3625 3626 return self.expression( 3627 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3628 ) 3629 3630 def _parse_limit( 3631 self, 3632 this: t.Optional[exp.Expression] = None, 3633 top: bool = False, 3634 skip_limit_token: bool = False, 3635 ) -> t.Optional[exp.Expression]: 3636 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3637 comments = self._prev_comments 3638 if top: 3639 limit_paren = self._match(TokenType.L_PAREN) 3640 expression = self._parse_term() if limit_paren else self._parse_number() 3641 3642 if limit_paren: 3643 self._match_r_paren() 3644 else: 3645 expression = self._parse_term() 3646 3647 if self._match(TokenType.COMMA): 3648 offset = expression 3649 expression = self._parse_term() 3650 else: 3651 offset = None 3652 3653 limit_exp = self.expression( 3654 exp.Limit, 3655 this=this, 3656 expression=expression, 3657 offset=offset, 3658 comments=comments, 3659 expressions=self._parse_limit_by(), 3660 ) 3661 3662 return limit_exp 3663 3664 if self._match(TokenType.FETCH): 3665 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3666 direction = self._prev.text.upper() if direction else "FIRST" 3667 3668 count = self._parse_field(tokens=self.FETCH_TOKENS) 3669 percent = self._match(TokenType.PERCENT) 3670 3671 self._match_set((TokenType.ROW, TokenType.ROWS)) 3672 3673 only = self._match_text_seq("ONLY") 3674 with_ties = self._match_text_seq("WITH", "TIES") 3675 3676 if only and with_ties: 3677 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3678 3679 return self.expression( 3680 exp.Fetch, 3681 direction=direction, 3682 count=count, 3683 percent=percent, 3684 with_ties=with_ties, 3685 ) 3686 3687 return this 3688 3689 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3690 if not self._match(TokenType.OFFSET): 3691 return this 3692 3693 count = self._parse_term() 3694 self._match_set((TokenType.ROW, TokenType.ROWS)) 3695 3696 return self.expression( 3697 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3698 ) 3699 3700 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3701 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3702 3703 def _parse_locks(self) -> t.List[exp.Lock]: 3704 locks = [] 3705 while True: 3706 if self._match_text_seq("FOR", "UPDATE"): 3707 update = True 3708 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3709 "LOCK", "IN", "SHARE", "MODE" 3710 ): 3711 update = False 3712 else: 3713 break 3714 3715 expressions = None 3716 if self._match_text_seq("OF"): 3717 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3718 3719 wait: t.Optional[bool | exp.Expression] = None 3720 if self._match_text_seq("NOWAIT"): 3721 wait = True 3722 elif self._match_text_seq("WAIT"): 3723 wait = self._parse_primary() 3724 elif self._match_text_seq("SKIP", "LOCKED"): 3725 wait = False 3726 3727 locks.append( 3728 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3729 ) 3730 3731 return locks 3732 3733 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3734 while this and self._match_set(self.SET_OPERATIONS): 3735 token_type = self._prev.token_type 3736 3737 if token_type == TokenType.UNION: 3738 operation = exp.Union 3739 elif token_type == TokenType.EXCEPT: 3740 operation = exp.Except 3741 else: 3742 operation = exp.Intersect 3743 3744 comments = self._prev.comments 3745 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3746 by_name = self._match_text_seq("BY", "NAME") 3747 expression = self._parse_select(nested=True, parse_set_operation=False) 3748 3749 this = self.expression( 3750 operation, 3751 comments=comments, 3752 this=this, 3753 distinct=distinct, 3754 by_name=by_name, 3755 expression=expression, 3756 ) 3757 3758 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3759 expression = this.expression 3760 3761 if expression: 3762 for arg in self.UNION_MODIFIERS: 3763 expr = expression.args.get(arg) 3764 if expr: 3765 this.set(arg, expr.pop()) 3766 3767 return this 3768 3769 def _parse_expression(self) -> t.Optional[exp.Expression]: 3770 return self._parse_alias(self._parse_conjunction()) 3771 3772 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3773 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3774 3775 def _parse_equality(self) -> t.Optional[exp.Expression]: 3776 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3777 3778 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3779 return self._parse_tokens(self._parse_range, self.COMPARISON) 3780 3781 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3782 this = this or self._parse_bitwise() 3783 negate = self._match(TokenType.NOT) 3784 3785 if self._match_set(self.RANGE_PARSERS): 3786 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3787 if not expression: 3788 return this 3789 3790 this = expression 3791 elif self._match(TokenType.ISNULL): 3792 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3793 3794 # Postgres supports ISNULL and NOTNULL for conditions. 3795 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3796 if self._match(TokenType.NOTNULL): 3797 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3798 this = self.expression(exp.Not, this=this) 3799 3800 if negate: 3801 this = self.expression(exp.Not, this=this) 3802 3803 if self._match(TokenType.IS): 3804 this = self._parse_is(this) 3805 3806 return this 3807 3808 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3809 index = self._index - 1 3810 negate = self._match(TokenType.NOT) 3811 3812 if self._match_text_seq("DISTINCT", "FROM"): 3813 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3814 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3815 3816 expression = self._parse_null() or self._parse_boolean() 3817 if not expression: 3818 self._retreat(index) 3819 return None 3820 3821 this = self.expression(exp.Is, this=this, expression=expression) 3822 return self.expression(exp.Not, this=this) if negate else this 3823 3824 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3825 unnest = self._parse_unnest(with_alias=False) 3826 if unnest: 3827 this = self.expression(exp.In, this=this, unnest=unnest) 3828 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3829 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3830 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3831 3832 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3833 this = self.expression(exp.In, this=this, query=expressions[0]) 3834 else: 3835 this = self.expression(exp.In, this=this, expressions=expressions) 3836 3837 if matched_l_paren: 3838 self._match_r_paren(this) 3839 elif not self._match(TokenType.R_BRACKET, expression=this): 3840 self.raise_error("Expecting ]") 3841 else: 3842 this = self.expression(exp.In, this=this, field=self._parse_field()) 3843 3844 return this 3845 3846 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3847 low = self._parse_bitwise() 3848 self._match(TokenType.AND) 3849 high = self._parse_bitwise() 3850 return self.expression(exp.Between, this=this, low=low, high=high) 3851 3852 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3853 if not self._match(TokenType.ESCAPE): 3854 return this 3855 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3856 3857 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3858 index = self._index 3859 3860 if not self._match(TokenType.INTERVAL) and match_interval: 3861 return None 3862 3863 if self._match(TokenType.STRING, advance=False): 3864 this = self._parse_primary() 3865 else: 3866 this = self._parse_term() 3867 3868 if not this or ( 3869 isinstance(this, exp.Column) 3870 and not this.table 3871 and not this.this.quoted 3872 and this.name.upper() == "IS" 3873 ): 3874 self._retreat(index) 3875 return None 3876 3877 unit = self._parse_function() or ( 3878 not self._match(TokenType.ALIAS, advance=False) 3879 and self._parse_var(any_token=True, upper=True) 3880 ) 3881 3882 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3883 # each INTERVAL expression into this canonical form so it's easy to transpile 3884 if this and this.is_number: 3885 this = exp.Literal.string(this.name) 3886 elif this and this.is_string: 3887 parts = this.name.split() 3888 3889 if len(parts) == 2: 3890 if unit: 3891 # This is not actually a unit, it's something else (e.g. a "window side") 3892 unit = None 3893 self._retreat(self._index - 1) 3894 3895 this = exp.Literal.string(parts[0]) 3896 unit = self.expression(exp.Var, this=parts[1].upper()) 3897 3898 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 3899 unit = self.expression( 3900 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 3901 ) 3902 3903 return self.expression(exp.Interval, this=this, unit=unit) 3904 3905 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3906 this = self._parse_term() 3907 3908 while True: 3909 if self._match_set(self.BITWISE): 3910 this = self.expression( 3911 self.BITWISE[self._prev.token_type], 3912 this=this, 3913 expression=self._parse_term(), 3914 ) 3915 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 3916 this = self.expression( 3917 exp.DPipe, 3918 this=this, 3919 expression=self._parse_term(), 3920 safe=not self.dialect.STRICT_STRING_CONCAT, 3921 ) 3922 elif self._match(TokenType.DQMARK): 3923 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 3924 elif self._match_pair(TokenType.LT, TokenType.LT): 3925 this = self.expression( 3926 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3927 ) 3928 elif self._match_pair(TokenType.GT, TokenType.GT): 3929 this = self.expression( 3930 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3931 ) 3932 else: 3933 break 3934 3935 return this 3936 3937 def _parse_term(self) -> t.Optional[exp.Expression]: 3938 return self._parse_tokens(self._parse_factor, self.TERM) 3939 3940 def _parse_factor(self) -> t.Optional[exp.Expression]: 3941 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 3942 this = parse_method() 3943 3944 while self._match_set(self.FACTOR): 3945 this = self.expression( 3946 self.FACTOR[self._prev.token_type], 3947 this=this, 3948 comments=self._prev_comments, 3949 expression=parse_method(), 3950 ) 3951 if isinstance(this, exp.Div): 3952 this.args["typed"] = self.dialect.TYPED_DIVISION 3953 this.args["safe"] = self.dialect.SAFE_DIVISION 3954 3955 return this 3956 3957 def _parse_exponent(self) -> t.Optional[exp.Expression]: 3958 return self._parse_tokens(self._parse_unary, self.EXPONENT) 3959 3960 def _parse_unary(self) -> t.Optional[exp.Expression]: 3961 if self._match_set(self.UNARY_PARSERS): 3962 return self.UNARY_PARSERS[self._prev.token_type](self) 3963 return self._parse_at_time_zone(self._parse_type()) 3964 3965 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 3966 interval = parse_interval and self._parse_interval() 3967 if interval: 3968 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 3969 while True: 3970 index = self._index 3971 self._match(TokenType.PLUS) 3972 3973 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 3974 self._retreat(index) 3975 break 3976 3977 interval = self.expression( # type: ignore 3978 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 3979 ) 3980 3981 return interval 3982 3983 index = self._index 3984 data_type = self._parse_types(check_func=True, allow_identifiers=False) 3985 this = self._parse_column() 3986 3987 if data_type: 3988 if isinstance(this, exp.Literal): 3989 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3990 if parser: 3991 return parser(self, this, data_type) 3992 return self.expression(exp.Cast, this=this, to=data_type) 3993 if not data_type.expressions: 3994 self._retreat(index) 3995 return self._parse_column() 3996 return self._parse_column_ops(data_type) 3997 3998 return this and self._parse_column_ops(this) 3999 4000 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4001 this = self._parse_type() 4002 if not this: 4003 return None 4004 4005 if isinstance(this, exp.Column) and not this.table: 4006 this = exp.var(this.name.upper()) 4007 4008 return self.expression( 4009 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4010 ) 4011 4012 def _parse_types( 4013 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4014 ) -> t.Optional[exp.Expression]: 4015 index = self._index 4016 4017 prefix = self._match_text_seq("SYSUDTLIB", ".") 4018 4019 if not self._match_set(self.TYPE_TOKENS): 4020 identifier = allow_identifiers and self._parse_id_var( 4021 any_token=False, tokens=(TokenType.VAR,) 4022 ) 4023 if identifier: 4024 tokens = self.dialect.tokenize(identifier.name) 4025 4026 if len(tokens) != 1: 4027 self.raise_error("Unexpected identifier", self._prev) 4028 4029 if tokens[0].token_type in self.TYPE_TOKENS: 4030 self._prev = tokens[0] 4031 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4032 type_name = identifier.name 4033 4034 while self._match(TokenType.DOT): 4035 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4036 4037 return exp.DataType.build(type_name, udt=True) 4038 else: 4039 self._retreat(self._index - 1) 4040 return None 4041 else: 4042 return None 4043 4044 type_token = self._prev.token_type 4045 4046 if type_token == TokenType.PSEUDO_TYPE: 4047 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4048 4049 if type_token == TokenType.OBJECT_IDENTIFIER: 4050 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4051 4052 nested = type_token in self.NESTED_TYPE_TOKENS 4053 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4054 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4055 expressions = None 4056 maybe_func = False 4057 4058 if self._match(TokenType.L_PAREN): 4059 if is_struct: 4060 expressions = self._parse_csv(self._parse_struct_types) 4061 elif nested: 4062 expressions = self._parse_csv( 4063 lambda: self._parse_types( 4064 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4065 ) 4066 ) 4067 elif type_token in self.ENUM_TYPE_TOKENS: 4068 expressions = self._parse_csv(self._parse_equality) 4069 elif is_aggregate: 4070 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4071 any_token=False, tokens=(TokenType.VAR,) 4072 ) 4073 if not func_or_ident or not self._match(TokenType.COMMA): 4074 return None 4075 expressions = self._parse_csv( 4076 lambda: self._parse_types( 4077 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4078 ) 4079 ) 4080 expressions.insert(0, func_or_ident) 4081 else: 4082 expressions = self._parse_csv(self._parse_type_size) 4083 4084 if not expressions or not self._match(TokenType.R_PAREN): 4085 self._retreat(index) 4086 return None 4087 4088 maybe_func = True 4089 4090 this: t.Optional[exp.Expression] = None 4091 values: t.Optional[t.List[exp.Expression]] = None 4092 4093 if nested and self._match(TokenType.LT): 4094 if is_struct: 4095 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4096 else: 4097 expressions = self._parse_csv( 4098 lambda: self._parse_types( 4099 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4100 ) 4101 ) 4102 4103 if not self._match(TokenType.GT): 4104 self.raise_error("Expecting >") 4105 4106 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4107 values = self._parse_csv(self._parse_conjunction) 4108 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4109 4110 if type_token in self.TIMESTAMPS: 4111 if self._match_text_seq("WITH", "TIME", "ZONE"): 4112 maybe_func = False 4113 tz_type = ( 4114 exp.DataType.Type.TIMETZ 4115 if type_token in self.TIMES 4116 else exp.DataType.Type.TIMESTAMPTZ 4117 ) 4118 this = exp.DataType(this=tz_type, expressions=expressions) 4119 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4120 maybe_func = False 4121 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4122 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4123 maybe_func = False 4124 elif type_token == TokenType.INTERVAL: 4125 unit = self._parse_var(upper=True) 4126 if unit: 4127 if self._match_text_seq("TO"): 4128 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4129 4130 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4131 else: 4132 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4133 4134 if maybe_func and check_func: 4135 index2 = self._index 4136 peek = self._parse_string() 4137 4138 if not peek: 4139 self._retreat(index) 4140 return None 4141 4142 self._retreat(index2) 4143 4144 if not this: 4145 if self._match_text_seq("UNSIGNED"): 4146 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4147 if not unsigned_type_token: 4148 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4149 4150 type_token = unsigned_type_token or type_token 4151 4152 this = exp.DataType( 4153 this=exp.DataType.Type[type_token.value], 4154 expressions=expressions, 4155 nested=nested, 4156 values=values, 4157 prefix=prefix, 4158 ) 4159 4160 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4161 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4162 4163 return this 4164 4165 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4166 index = self._index 4167 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4168 self._match(TokenType.COLON) 4169 column_def = self._parse_column_def(this) 4170 4171 if type_required and ( 4172 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4173 ): 4174 self._retreat(index) 4175 return self._parse_types() 4176 4177 return column_def 4178 4179 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4180 if not self._match_text_seq("AT", "TIME", "ZONE"): 4181 return this 4182 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4183 4184 def _parse_column(self) -> t.Optional[exp.Expression]: 4185 this = self._parse_column_reference() 4186 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4187 4188 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4189 this = self._parse_field() 4190 if ( 4191 not this 4192 and self._match(TokenType.VALUES, advance=False) 4193 and self.VALUES_FOLLOWED_BY_PAREN 4194 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4195 ): 4196 this = self._parse_id_var() 4197 4198 return self.expression(exp.Column, this=this) if isinstance(this, exp.Identifier) else this 4199 4200 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4201 this = self._parse_bracket(this) 4202 4203 while self._match_set(self.COLUMN_OPERATORS): 4204 op_token = self._prev.token_type 4205 op = self.COLUMN_OPERATORS.get(op_token) 4206 4207 if op_token == TokenType.DCOLON: 4208 field = self._parse_types() 4209 if not field: 4210 self.raise_error("Expected type") 4211 elif op and self._curr: 4212 field = self._parse_column_reference() 4213 else: 4214 field = self._parse_field(anonymous_func=True, any_token=True) 4215 4216 if isinstance(field, exp.Func) and this: 4217 # bigquery allows function calls like x.y.count(...) 4218 # SAFE.SUBSTR(...) 4219 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4220 this = exp.replace_tree( 4221 this, 4222 lambda n: ( 4223 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4224 if n.table 4225 else n.this 4226 ) 4227 if isinstance(n, exp.Column) 4228 else n, 4229 ) 4230 4231 if op: 4232 this = op(self, this, field) 4233 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4234 this = self.expression( 4235 exp.Column, 4236 this=field, 4237 table=this.this, 4238 db=this.args.get("table"), 4239 catalog=this.args.get("db"), 4240 ) 4241 else: 4242 this = self.expression(exp.Dot, this=this, expression=field) 4243 this = self._parse_bracket(this) 4244 return this 4245 4246 def _parse_primary(self) -> t.Optional[exp.Expression]: 4247 if self._match_set(self.PRIMARY_PARSERS): 4248 token_type = self._prev.token_type 4249 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4250 4251 if token_type == TokenType.STRING: 4252 expressions = [primary] 4253 while self._match(TokenType.STRING): 4254 expressions.append(exp.Literal.string(self._prev.text)) 4255 4256 if len(expressions) > 1: 4257 return self.expression(exp.Concat, expressions=expressions) 4258 4259 return primary 4260 4261 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4262 return exp.Literal.number(f"0.{self._prev.text}") 4263 4264 if self._match(TokenType.L_PAREN): 4265 comments = self._prev_comments 4266 query = self._parse_select() 4267 4268 if query: 4269 expressions = [query] 4270 else: 4271 expressions = self._parse_expressions() 4272 4273 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4274 4275 if isinstance(this, exp.UNWRAPPED_QUERIES): 4276 this = self._parse_set_operations( 4277 self._parse_subquery(this=this, parse_alias=False) 4278 ) 4279 elif isinstance(this, exp.Subquery): 4280 this = self._parse_subquery( 4281 this=self._parse_set_operations(this), parse_alias=False 4282 ) 4283 elif len(expressions) > 1: 4284 this = self.expression(exp.Tuple, expressions=expressions) 4285 else: 4286 this = self.expression(exp.Paren, this=this) 4287 4288 if this: 4289 this.add_comments(comments) 4290 4291 self._match_r_paren(expression=this) 4292 return this 4293 4294 return None 4295 4296 def _parse_field( 4297 self, 4298 any_token: bool = False, 4299 tokens: t.Optional[t.Collection[TokenType]] = None, 4300 anonymous_func: bool = False, 4301 ) -> t.Optional[exp.Expression]: 4302 return ( 4303 self._parse_primary() 4304 or self._parse_function(anonymous=anonymous_func) 4305 or self._parse_id_var(any_token=any_token, tokens=tokens) 4306 ) 4307 4308 def _parse_function( 4309 self, 4310 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4311 anonymous: bool = False, 4312 optional_parens: bool = True, 4313 ) -> t.Optional[exp.Expression]: 4314 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4315 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4316 fn_syntax = False 4317 if ( 4318 self._match(TokenType.L_BRACE, advance=False) 4319 and self._next 4320 and self._next.text.upper() == "FN" 4321 ): 4322 self._advance(2) 4323 fn_syntax = True 4324 4325 func = self._parse_function_call( 4326 functions=functions, anonymous=anonymous, optional_parens=optional_parens 4327 ) 4328 4329 if fn_syntax: 4330 self._match(TokenType.R_BRACE) 4331 4332 return func 4333 4334 def _parse_function_call( 4335 self, 4336 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4337 anonymous: bool = False, 4338 optional_parens: bool = True, 4339 ) -> t.Optional[exp.Expression]: 4340 if not self._curr: 4341 return None 4342 4343 comments = self._curr.comments 4344 token_type = self._curr.token_type 4345 this = self._curr.text 4346 upper = this.upper() 4347 4348 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4349 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4350 self._advance() 4351 return self._parse_window(parser(self)) 4352 4353 if not self._next or self._next.token_type != TokenType.L_PAREN: 4354 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4355 self._advance() 4356 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4357 4358 return None 4359 4360 if token_type not in self.FUNC_TOKENS: 4361 return None 4362 4363 self._advance(2) 4364 4365 parser = self.FUNCTION_PARSERS.get(upper) 4366 if parser and not anonymous: 4367 this = parser(self) 4368 else: 4369 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4370 4371 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4372 this = self.expression(subquery_predicate, this=self._parse_select()) 4373 self._match_r_paren() 4374 return this 4375 4376 if functions is None: 4377 functions = self.FUNCTIONS 4378 4379 function = functions.get(upper) 4380 4381 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4382 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4383 4384 if alias: 4385 args = self._kv_to_prop_eq(args) 4386 4387 if function and not anonymous: 4388 if "dialect" in function.__code__.co_varnames: 4389 func = function(args, dialect=self.dialect) 4390 else: 4391 func = function(args) 4392 4393 func = self.validate_expression(func, args) 4394 if not self.dialect.NORMALIZE_FUNCTIONS: 4395 func.meta["name"] = this 4396 4397 this = func 4398 else: 4399 if token_type == TokenType.IDENTIFIER: 4400 this = exp.Identifier(this=this, quoted=True) 4401 this = self.expression(exp.Anonymous, this=this, expressions=args) 4402 4403 if isinstance(this, exp.Expression): 4404 this.add_comments(comments) 4405 4406 self._match_r_paren(this) 4407 return self._parse_window(this) 4408 4409 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4410 transformed = [] 4411 4412 for e in expressions: 4413 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4414 if isinstance(e, exp.Alias): 4415 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4416 4417 if not isinstance(e, exp.PropertyEQ): 4418 e = self.expression( 4419 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4420 ) 4421 4422 if isinstance(e.this, exp.Column): 4423 e.this.replace(e.this.this) 4424 4425 transformed.append(e) 4426 4427 return transformed 4428 4429 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4430 return self._parse_column_def(self._parse_id_var()) 4431 4432 def _parse_user_defined_function( 4433 self, kind: t.Optional[TokenType] = None 4434 ) -> t.Optional[exp.Expression]: 4435 this = self._parse_id_var() 4436 4437 while self._match(TokenType.DOT): 4438 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4439 4440 if not self._match(TokenType.L_PAREN): 4441 return this 4442 4443 expressions = self._parse_csv(self._parse_function_parameter) 4444 self._match_r_paren() 4445 return self.expression( 4446 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4447 ) 4448 4449 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4450 literal = self._parse_primary() 4451 if literal: 4452 return self.expression(exp.Introducer, this=token.text, expression=literal) 4453 4454 return self.expression(exp.Identifier, this=token.text) 4455 4456 def _parse_session_parameter(self) -> exp.SessionParameter: 4457 kind = None 4458 this = self._parse_id_var() or self._parse_primary() 4459 4460 if this and self._match(TokenType.DOT): 4461 kind = this.name 4462 this = self._parse_var() or self._parse_primary() 4463 4464 return self.expression(exp.SessionParameter, this=this, kind=kind) 4465 4466 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4467 index = self._index 4468 4469 if self._match(TokenType.L_PAREN): 4470 expressions = t.cast( 4471 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4472 ) 4473 4474 if not self._match(TokenType.R_PAREN): 4475 self._retreat(index) 4476 else: 4477 expressions = [self._parse_id_var()] 4478 4479 if self._match_set(self.LAMBDAS): 4480 return self.LAMBDAS[self._prev.token_type](self, expressions) 4481 4482 self._retreat(index) 4483 4484 this: t.Optional[exp.Expression] 4485 4486 if self._match(TokenType.DISTINCT): 4487 this = self.expression( 4488 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4489 ) 4490 else: 4491 this = self._parse_select_or_expression(alias=alias) 4492 4493 return self._parse_limit( 4494 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4495 ) 4496 4497 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4498 index = self._index 4499 4500 if not self._match(TokenType.L_PAREN): 4501 return this 4502 4503 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4504 # expr can be of both types 4505 if self._match_set(self.SELECT_START_TOKENS): 4506 self._retreat(index) 4507 return this 4508 4509 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4510 4511 self._match_r_paren() 4512 return self.expression(exp.Schema, this=this, expressions=args) 4513 4514 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4515 return self._parse_column_def(self._parse_field(any_token=True)) 4516 4517 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4518 # column defs are not really columns, they're identifiers 4519 if isinstance(this, exp.Column): 4520 this = this.this 4521 4522 kind = self._parse_types(schema=True) 4523 4524 if self._match_text_seq("FOR", "ORDINALITY"): 4525 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4526 4527 constraints: t.List[exp.Expression] = [] 4528 4529 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4530 ("ALIAS", "MATERIALIZED") 4531 ): 4532 persisted = self._prev.text.upper() == "MATERIALIZED" 4533 constraints.append( 4534 self.expression( 4535 exp.ComputedColumnConstraint, 4536 this=self._parse_conjunction(), 4537 persisted=persisted or self._match_text_seq("PERSISTED"), 4538 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4539 ) 4540 ) 4541 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4542 self._match(TokenType.ALIAS) 4543 constraints.append( 4544 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4545 ) 4546 4547 while True: 4548 constraint = self._parse_column_constraint() 4549 if not constraint: 4550 break 4551 constraints.append(constraint) 4552 4553 if not kind and not constraints: 4554 return this 4555 4556 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4557 4558 def _parse_auto_increment( 4559 self, 4560 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4561 start = None 4562 increment = None 4563 4564 if self._match(TokenType.L_PAREN, advance=False): 4565 args = self._parse_wrapped_csv(self._parse_bitwise) 4566 start = seq_get(args, 0) 4567 increment = seq_get(args, 1) 4568 elif self._match_text_seq("START"): 4569 start = self._parse_bitwise() 4570 self._match_text_seq("INCREMENT") 4571 increment = self._parse_bitwise() 4572 4573 if start and increment: 4574 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4575 4576 return exp.AutoIncrementColumnConstraint() 4577 4578 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4579 if not self._match_text_seq("REFRESH"): 4580 self._retreat(self._index - 1) 4581 return None 4582 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4583 4584 def _parse_compress(self) -> exp.CompressColumnConstraint: 4585 if self._match(TokenType.L_PAREN, advance=False): 4586 return self.expression( 4587 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4588 ) 4589 4590 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4591 4592 def _parse_generated_as_identity( 4593 self, 4594 ) -> ( 4595 exp.GeneratedAsIdentityColumnConstraint 4596 | exp.ComputedColumnConstraint 4597 | exp.GeneratedAsRowColumnConstraint 4598 ): 4599 if self._match_text_seq("BY", "DEFAULT"): 4600 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4601 this = self.expression( 4602 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4603 ) 4604 else: 4605 self._match_text_seq("ALWAYS") 4606 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4607 4608 self._match(TokenType.ALIAS) 4609 4610 if self._match_text_seq("ROW"): 4611 start = self._match_text_seq("START") 4612 if not start: 4613 self._match(TokenType.END) 4614 hidden = self._match_text_seq("HIDDEN") 4615 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4616 4617 identity = self._match_text_seq("IDENTITY") 4618 4619 if self._match(TokenType.L_PAREN): 4620 if self._match(TokenType.START_WITH): 4621 this.set("start", self._parse_bitwise()) 4622 if self._match_text_seq("INCREMENT", "BY"): 4623 this.set("increment", self._parse_bitwise()) 4624 if self._match_text_seq("MINVALUE"): 4625 this.set("minvalue", self._parse_bitwise()) 4626 if self._match_text_seq("MAXVALUE"): 4627 this.set("maxvalue", self._parse_bitwise()) 4628 4629 if self._match_text_seq("CYCLE"): 4630 this.set("cycle", True) 4631 elif self._match_text_seq("NO", "CYCLE"): 4632 this.set("cycle", False) 4633 4634 if not identity: 4635 this.set("expression", self._parse_bitwise()) 4636 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4637 args = self._parse_csv(self._parse_bitwise) 4638 this.set("start", seq_get(args, 0)) 4639 this.set("increment", seq_get(args, 1)) 4640 4641 self._match_r_paren() 4642 4643 return this 4644 4645 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4646 self._match_text_seq("LENGTH") 4647 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4648 4649 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4650 if self._match_text_seq("NULL"): 4651 return self.expression(exp.NotNullColumnConstraint) 4652 if self._match_text_seq("CASESPECIFIC"): 4653 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4654 if self._match_text_seq("FOR", "REPLICATION"): 4655 return self.expression(exp.NotForReplicationColumnConstraint) 4656 return None 4657 4658 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4659 if self._match(TokenType.CONSTRAINT): 4660 this = self._parse_id_var() 4661 else: 4662 this = None 4663 4664 if self._match_texts(self.CONSTRAINT_PARSERS): 4665 return self.expression( 4666 exp.ColumnConstraint, 4667 this=this, 4668 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4669 ) 4670 4671 return this 4672 4673 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4674 if not self._match(TokenType.CONSTRAINT): 4675 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4676 4677 return self.expression( 4678 exp.Constraint, 4679 this=self._parse_id_var(), 4680 expressions=self._parse_unnamed_constraints(), 4681 ) 4682 4683 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4684 constraints = [] 4685 while True: 4686 constraint = self._parse_unnamed_constraint() or self._parse_function() 4687 if not constraint: 4688 break 4689 constraints.append(constraint) 4690 4691 return constraints 4692 4693 def _parse_unnamed_constraint( 4694 self, constraints: t.Optional[t.Collection[str]] = None 4695 ) -> t.Optional[exp.Expression]: 4696 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4697 constraints or self.CONSTRAINT_PARSERS 4698 ): 4699 return None 4700 4701 constraint = self._prev.text.upper() 4702 if constraint not in self.CONSTRAINT_PARSERS: 4703 self.raise_error(f"No parser found for schema constraint {constraint}.") 4704 4705 return self.CONSTRAINT_PARSERS[constraint](self) 4706 4707 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4708 self._match_text_seq("KEY") 4709 return self.expression( 4710 exp.UniqueColumnConstraint, 4711 this=self._parse_schema(self._parse_id_var(any_token=False)), 4712 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4713 on_conflict=self._parse_on_conflict(), 4714 ) 4715 4716 def _parse_key_constraint_options(self) -> t.List[str]: 4717 options = [] 4718 while True: 4719 if not self._curr: 4720 break 4721 4722 if self._match(TokenType.ON): 4723 action = None 4724 on = self._advance_any() and self._prev.text 4725 4726 if self._match_text_seq("NO", "ACTION"): 4727 action = "NO ACTION" 4728 elif self._match_text_seq("CASCADE"): 4729 action = "CASCADE" 4730 elif self._match_text_seq("RESTRICT"): 4731 action = "RESTRICT" 4732 elif self._match_pair(TokenType.SET, TokenType.NULL): 4733 action = "SET NULL" 4734 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4735 action = "SET DEFAULT" 4736 else: 4737 self.raise_error("Invalid key constraint") 4738 4739 options.append(f"ON {on} {action}") 4740 elif self._match_text_seq("NOT", "ENFORCED"): 4741 options.append("NOT ENFORCED") 4742 elif self._match_text_seq("DEFERRABLE"): 4743 options.append("DEFERRABLE") 4744 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4745 options.append("INITIALLY DEFERRED") 4746 elif self._match_text_seq("NORELY"): 4747 options.append("NORELY") 4748 elif self._match_text_seq("MATCH", "FULL"): 4749 options.append("MATCH FULL") 4750 else: 4751 break 4752 4753 return options 4754 4755 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4756 if match and not self._match(TokenType.REFERENCES): 4757 return None 4758 4759 expressions = None 4760 this = self._parse_table(schema=True) 4761 options = self._parse_key_constraint_options() 4762 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4763 4764 def _parse_foreign_key(self) -> exp.ForeignKey: 4765 expressions = self._parse_wrapped_id_vars() 4766 reference = self._parse_references() 4767 options = {} 4768 4769 while self._match(TokenType.ON): 4770 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4771 self.raise_error("Expected DELETE or UPDATE") 4772 4773 kind = self._prev.text.lower() 4774 4775 if self._match_text_seq("NO", "ACTION"): 4776 action = "NO ACTION" 4777 elif self._match(TokenType.SET): 4778 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4779 action = "SET " + self._prev.text.upper() 4780 else: 4781 self._advance() 4782 action = self._prev.text.upper() 4783 4784 options[kind] = action 4785 4786 return self.expression( 4787 exp.ForeignKey, 4788 expressions=expressions, 4789 reference=reference, 4790 **options, # type: ignore 4791 ) 4792 4793 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4794 return self._parse_field() 4795 4796 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4797 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4798 self._retreat(self._index - 1) 4799 return None 4800 4801 id_vars = self._parse_wrapped_id_vars() 4802 return self.expression( 4803 exp.PeriodForSystemTimeConstraint, 4804 this=seq_get(id_vars, 0), 4805 expression=seq_get(id_vars, 1), 4806 ) 4807 4808 def _parse_primary_key( 4809 self, wrapped_optional: bool = False, in_props: bool = False 4810 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4811 desc = ( 4812 self._match_set((TokenType.ASC, TokenType.DESC)) 4813 and self._prev.token_type == TokenType.DESC 4814 ) 4815 4816 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4817 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4818 4819 expressions = self._parse_wrapped_csv( 4820 self._parse_primary_key_part, optional=wrapped_optional 4821 ) 4822 options = self._parse_key_constraint_options() 4823 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4824 4825 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4826 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4827 4828 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4829 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4830 return this 4831 4832 bracket_kind = self._prev.token_type 4833 expressions = self._parse_csv( 4834 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4835 ) 4836 4837 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4838 self.raise_error("Expected ]") 4839 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4840 self.raise_error("Expected }") 4841 4842 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4843 if bracket_kind == TokenType.L_BRACE: 4844 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4845 elif not this or this.name.upper() == "ARRAY": 4846 this = self.expression(exp.Array, expressions=expressions) 4847 else: 4848 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4849 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4850 4851 self._add_comments(this) 4852 return self._parse_bracket(this) 4853 4854 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4855 if self._match(TokenType.COLON): 4856 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4857 return this 4858 4859 def _parse_case(self) -> t.Optional[exp.Expression]: 4860 ifs = [] 4861 default = None 4862 4863 comments = self._prev_comments 4864 expression = self._parse_conjunction() 4865 4866 while self._match(TokenType.WHEN): 4867 this = self._parse_conjunction() 4868 self._match(TokenType.THEN) 4869 then = self._parse_conjunction() 4870 ifs.append(self.expression(exp.If, this=this, true=then)) 4871 4872 if self._match(TokenType.ELSE): 4873 default = self._parse_conjunction() 4874 4875 if not self._match(TokenType.END): 4876 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 4877 default = exp.column("interval") 4878 else: 4879 self.raise_error("Expected END after CASE", self._prev) 4880 4881 return self.expression( 4882 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 4883 ) 4884 4885 def _parse_if(self) -> t.Optional[exp.Expression]: 4886 if self._match(TokenType.L_PAREN): 4887 args = self._parse_csv(self._parse_conjunction) 4888 this = self.validate_expression(exp.If.from_arg_list(args), args) 4889 self._match_r_paren() 4890 else: 4891 index = self._index - 1 4892 4893 if self.NO_PAREN_IF_COMMANDS and index == 0: 4894 return self._parse_as_command(self._prev) 4895 4896 condition = self._parse_conjunction() 4897 4898 if not condition: 4899 self._retreat(index) 4900 return None 4901 4902 self._match(TokenType.THEN) 4903 true = self._parse_conjunction() 4904 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 4905 self._match(TokenType.END) 4906 this = self.expression(exp.If, this=condition, true=true, false=false) 4907 4908 return this 4909 4910 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 4911 if not self._match_text_seq("VALUE", "FOR"): 4912 self._retreat(self._index - 1) 4913 return None 4914 4915 return self.expression( 4916 exp.NextValueFor, 4917 this=self._parse_column(), 4918 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 4919 ) 4920 4921 def _parse_extract(self) -> exp.Extract: 4922 this = self._parse_function() or self._parse_var() or self._parse_type() 4923 4924 if self._match(TokenType.FROM): 4925 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4926 4927 if not self._match(TokenType.COMMA): 4928 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 4929 4930 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 4931 4932 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 4933 this = self._parse_conjunction() 4934 4935 if not self._match(TokenType.ALIAS): 4936 if self._match(TokenType.COMMA): 4937 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 4938 4939 self.raise_error("Expected AS after CAST") 4940 4941 fmt = None 4942 to = self._parse_types() 4943 4944 if self._match(TokenType.FORMAT): 4945 fmt_string = self._parse_string() 4946 fmt = self._parse_at_time_zone(fmt_string) 4947 4948 if not to: 4949 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 4950 if to.this in exp.DataType.TEMPORAL_TYPES: 4951 this = self.expression( 4952 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 4953 this=this, 4954 format=exp.Literal.string( 4955 format_time( 4956 fmt_string.this if fmt_string else "", 4957 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 4958 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 4959 ) 4960 ), 4961 ) 4962 4963 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 4964 this.set("zone", fmt.args["zone"]) 4965 return this 4966 elif not to: 4967 self.raise_error("Expected TYPE after CAST") 4968 elif isinstance(to, exp.Identifier): 4969 to = exp.DataType.build(to.name, udt=True) 4970 elif to.this == exp.DataType.Type.CHAR: 4971 if self._match(TokenType.CHARACTER_SET): 4972 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 4973 4974 return self.expression( 4975 exp.Cast if strict else exp.TryCast, 4976 this=this, 4977 to=to, 4978 format=fmt, 4979 safe=safe, 4980 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 4981 ) 4982 4983 def _parse_string_agg(self) -> exp.Expression: 4984 if self._match(TokenType.DISTINCT): 4985 args: t.List[t.Optional[exp.Expression]] = [ 4986 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 4987 ] 4988 if self._match(TokenType.COMMA): 4989 args.extend(self._parse_csv(self._parse_conjunction)) 4990 else: 4991 args = self._parse_csv(self._parse_conjunction) # type: ignore 4992 4993 index = self._index 4994 if not self._match(TokenType.R_PAREN) and args: 4995 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 4996 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 4997 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 4998 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 4999 5000 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5001 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5002 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5003 if not self._match_text_seq("WITHIN", "GROUP"): 5004 self._retreat(index) 5005 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5006 5007 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5008 order = self._parse_order(this=seq_get(args, 0)) 5009 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5010 5011 def _parse_convert( 5012 self, strict: bool, safe: t.Optional[bool] = None 5013 ) -> t.Optional[exp.Expression]: 5014 this = self._parse_bitwise() 5015 5016 if self._match(TokenType.USING): 5017 to: t.Optional[exp.Expression] = self.expression( 5018 exp.CharacterSet, this=self._parse_var() 5019 ) 5020 elif self._match(TokenType.COMMA): 5021 to = self._parse_types() 5022 else: 5023 to = None 5024 5025 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5026 5027 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5028 """ 5029 There are generally two variants of the DECODE function: 5030 5031 - DECODE(bin, charset) 5032 - DECODE(expression, search, result [, search, result] ... [, default]) 5033 5034 The second variant will always be parsed into a CASE expression. Note that NULL 5035 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5036 instead of relying on pattern matching. 5037 """ 5038 args = self._parse_csv(self._parse_conjunction) 5039 5040 if len(args) < 3: 5041 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5042 5043 expression, *expressions = args 5044 if not expression: 5045 return None 5046 5047 ifs = [] 5048 for search, result in zip(expressions[::2], expressions[1::2]): 5049 if not search or not result: 5050 return None 5051 5052 if isinstance(search, exp.Literal): 5053 ifs.append( 5054 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5055 ) 5056 elif isinstance(search, exp.Null): 5057 ifs.append( 5058 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5059 ) 5060 else: 5061 cond = exp.or_( 5062 exp.EQ(this=expression.copy(), expression=search), 5063 exp.and_( 5064 exp.Is(this=expression.copy(), expression=exp.Null()), 5065 exp.Is(this=search.copy(), expression=exp.Null()), 5066 copy=False, 5067 ), 5068 copy=False, 5069 ) 5070 ifs.append(exp.If(this=cond, true=result)) 5071 5072 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5073 5074 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5075 self._match_text_seq("KEY") 5076 key = self._parse_column() 5077 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5078 self._match_text_seq("VALUE") 5079 value = self._parse_bitwise() 5080 5081 if not key and not value: 5082 return None 5083 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5084 5085 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5086 if not this or not self._match_text_seq("FORMAT", "JSON"): 5087 return this 5088 5089 return self.expression(exp.FormatJson, this=this) 5090 5091 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5092 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5093 for value in values: 5094 if self._match_text_seq(value, "ON", on): 5095 return f"{value} ON {on}" 5096 5097 return None 5098 5099 @t.overload 5100 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5101 5102 @t.overload 5103 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5104 5105 def _parse_json_object(self, agg=False): 5106 star = self._parse_star() 5107 expressions = ( 5108 [star] 5109 if star 5110 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5111 ) 5112 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5113 5114 unique_keys = None 5115 if self._match_text_seq("WITH", "UNIQUE"): 5116 unique_keys = True 5117 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5118 unique_keys = False 5119 5120 self._match_text_seq("KEYS") 5121 5122 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5123 self._parse_type() 5124 ) 5125 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5126 5127 return self.expression( 5128 exp.JSONObjectAgg if agg else exp.JSONObject, 5129 expressions=expressions, 5130 null_handling=null_handling, 5131 unique_keys=unique_keys, 5132 return_type=return_type, 5133 encoding=encoding, 5134 ) 5135 5136 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5137 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5138 if not self._match_text_seq("NESTED"): 5139 this = self._parse_id_var() 5140 kind = self._parse_types(allow_identifiers=False) 5141 nested = None 5142 else: 5143 this = None 5144 kind = None 5145 nested = True 5146 5147 path = self._match_text_seq("PATH") and self._parse_string() 5148 nested_schema = nested and self._parse_json_schema() 5149 5150 return self.expression( 5151 exp.JSONColumnDef, 5152 this=this, 5153 kind=kind, 5154 path=path, 5155 nested_schema=nested_schema, 5156 ) 5157 5158 def _parse_json_schema(self) -> exp.JSONSchema: 5159 self._match_text_seq("COLUMNS") 5160 return self.expression( 5161 exp.JSONSchema, 5162 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5163 ) 5164 5165 def _parse_json_table(self) -> exp.JSONTable: 5166 this = self._parse_format_json(self._parse_bitwise()) 5167 path = self._match(TokenType.COMMA) and self._parse_string() 5168 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5169 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5170 schema = self._parse_json_schema() 5171 5172 return exp.JSONTable( 5173 this=this, 5174 schema=schema, 5175 path=path, 5176 error_handling=error_handling, 5177 empty_handling=empty_handling, 5178 ) 5179 5180 def _parse_match_against(self) -> exp.MatchAgainst: 5181 expressions = self._parse_csv(self._parse_column) 5182 5183 self._match_text_seq(")", "AGAINST", "(") 5184 5185 this = self._parse_string() 5186 5187 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5188 modifier = "IN NATURAL LANGUAGE MODE" 5189 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5190 modifier = f"{modifier} WITH QUERY EXPANSION" 5191 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5192 modifier = "IN BOOLEAN MODE" 5193 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5194 modifier = "WITH QUERY EXPANSION" 5195 else: 5196 modifier = None 5197 5198 return self.expression( 5199 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5200 ) 5201 5202 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5203 def _parse_open_json(self) -> exp.OpenJSON: 5204 this = self._parse_bitwise() 5205 path = self._match(TokenType.COMMA) and self._parse_string() 5206 5207 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5208 this = self._parse_field(any_token=True) 5209 kind = self._parse_types() 5210 path = self._parse_string() 5211 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5212 5213 return self.expression( 5214 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5215 ) 5216 5217 expressions = None 5218 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5219 self._match_l_paren() 5220 expressions = self._parse_csv(_parse_open_json_column_def) 5221 5222 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5223 5224 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5225 args = self._parse_csv(self._parse_bitwise) 5226 5227 if self._match(TokenType.IN): 5228 return self.expression( 5229 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5230 ) 5231 5232 if haystack_first: 5233 haystack = seq_get(args, 0) 5234 needle = seq_get(args, 1) 5235 else: 5236 needle = seq_get(args, 0) 5237 haystack = seq_get(args, 1) 5238 5239 return self.expression( 5240 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5241 ) 5242 5243 def _parse_predict(self) -> exp.Predict: 5244 self._match_text_seq("MODEL") 5245 this = self._parse_table() 5246 5247 self._match(TokenType.COMMA) 5248 self._match_text_seq("TABLE") 5249 5250 return self.expression( 5251 exp.Predict, 5252 this=this, 5253 expression=self._parse_table(), 5254 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5255 ) 5256 5257 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5258 args = self._parse_csv(self._parse_table) 5259 return exp.JoinHint(this=func_name.upper(), expressions=args) 5260 5261 def _parse_substring(self) -> exp.Substring: 5262 # Postgres supports the form: substring(string [from int] [for int]) 5263 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5264 5265 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5266 5267 if self._match(TokenType.FROM): 5268 args.append(self._parse_bitwise()) 5269 if self._match(TokenType.FOR): 5270 args.append(self._parse_bitwise()) 5271 5272 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5273 5274 def _parse_trim(self) -> exp.Trim: 5275 # https://www.w3resource.com/sql/character-functions/trim.php 5276 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5277 5278 position = None 5279 collation = None 5280 expression = None 5281 5282 if self._match_texts(self.TRIM_TYPES): 5283 position = self._prev.text.upper() 5284 5285 this = self._parse_bitwise() 5286 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5287 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5288 expression = self._parse_bitwise() 5289 5290 if invert_order: 5291 this, expression = expression, this 5292 5293 if self._match(TokenType.COLLATE): 5294 collation = self._parse_bitwise() 5295 5296 return self.expression( 5297 exp.Trim, this=this, position=position, expression=expression, collation=collation 5298 ) 5299 5300 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5301 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5302 5303 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5304 return self._parse_window(self._parse_id_var(), alias=True) 5305 5306 def _parse_respect_or_ignore_nulls( 5307 self, this: t.Optional[exp.Expression] 5308 ) -> t.Optional[exp.Expression]: 5309 if self._match_text_seq("IGNORE", "NULLS"): 5310 return self.expression(exp.IgnoreNulls, this=this) 5311 if self._match_text_seq("RESPECT", "NULLS"): 5312 return self.expression(exp.RespectNulls, this=this) 5313 return this 5314 5315 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5316 if self._match(TokenType.HAVING): 5317 self._match_texts(("MAX", "MIN")) 5318 max = self._prev.text.upper() != "MIN" 5319 return self.expression( 5320 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5321 ) 5322 5323 return this 5324 5325 def _parse_window( 5326 self, this: t.Optional[exp.Expression], alias: bool = False 5327 ) -> t.Optional[exp.Expression]: 5328 func = this 5329 comments = func.comments if isinstance(func, exp.Expression) else None 5330 5331 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5332 self._match(TokenType.WHERE) 5333 this = self.expression( 5334 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5335 ) 5336 self._match_r_paren() 5337 5338 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5339 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5340 if self._match_text_seq("WITHIN", "GROUP"): 5341 order = self._parse_wrapped(self._parse_order) 5342 this = self.expression(exp.WithinGroup, this=this, expression=order) 5343 5344 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5345 # Some dialects choose to implement and some do not. 5346 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5347 5348 # There is some code above in _parse_lambda that handles 5349 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5350 5351 # The below changes handle 5352 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5353 5354 # Oracle allows both formats 5355 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5356 # and Snowflake chose to do the same for familiarity 5357 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5358 if isinstance(this, exp.AggFunc): 5359 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5360 5361 if ignore_respect and ignore_respect is not this: 5362 ignore_respect.replace(ignore_respect.this) 5363 this = self.expression(ignore_respect.__class__, this=this) 5364 5365 this = self._parse_respect_or_ignore_nulls(this) 5366 5367 # bigquery select from window x AS (partition by ...) 5368 if alias: 5369 over = None 5370 self._match(TokenType.ALIAS) 5371 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5372 return this 5373 else: 5374 over = self._prev.text.upper() 5375 5376 if comments: 5377 func.comments = None # type: ignore 5378 5379 if not self._match(TokenType.L_PAREN): 5380 return self.expression( 5381 exp.Window, 5382 comments=comments, 5383 this=this, 5384 alias=self._parse_id_var(False), 5385 over=over, 5386 ) 5387 5388 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5389 5390 first = self._match(TokenType.FIRST) 5391 if self._match_text_seq("LAST"): 5392 first = False 5393 5394 partition, order = self._parse_partition_and_order() 5395 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5396 5397 if kind: 5398 self._match(TokenType.BETWEEN) 5399 start = self._parse_window_spec() 5400 self._match(TokenType.AND) 5401 end = self._parse_window_spec() 5402 5403 spec = self.expression( 5404 exp.WindowSpec, 5405 kind=kind, 5406 start=start["value"], 5407 start_side=start["side"], 5408 end=end["value"], 5409 end_side=end["side"], 5410 ) 5411 else: 5412 spec = None 5413 5414 self._match_r_paren() 5415 5416 window = self.expression( 5417 exp.Window, 5418 comments=comments, 5419 this=this, 5420 partition_by=partition, 5421 order=order, 5422 spec=spec, 5423 alias=window_alias, 5424 over=over, 5425 first=first, 5426 ) 5427 5428 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5429 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5430 return self._parse_window(window, alias=alias) 5431 5432 return window 5433 5434 def _parse_partition_and_order( 5435 self, 5436 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5437 return self._parse_partition_by(), self._parse_order() 5438 5439 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5440 self._match(TokenType.BETWEEN) 5441 5442 return { 5443 "value": ( 5444 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5445 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5446 or self._parse_bitwise() 5447 ), 5448 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5449 } 5450 5451 def _parse_alias( 5452 self, this: t.Optional[exp.Expression], explicit: bool = False 5453 ) -> t.Optional[exp.Expression]: 5454 any_token = self._match(TokenType.ALIAS) 5455 comments = self._prev_comments 5456 5457 if explicit and not any_token: 5458 return this 5459 5460 if self._match(TokenType.L_PAREN): 5461 aliases = self.expression( 5462 exp.Aliases, 5463 comments=comments, 5464 this=this, 5465 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5466 ) 5467 self._match_r_paren(aliases) 5468 return aliases 5469 5470 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5471 self.STRING_ALIASES and self._parse_string_as_identifier() 5472 ) 5473 5474 if alias: 5475 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5476 column = this.this 5477 5478 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5479 if not this.comments and column and column.comments: 5480 this.comments = column.comments 5481 column.comments = None 5482 5483 return this 5484 5485 def _parse_id_var( 5486 self, 5487 any_token: bool = True, 5488 tokens: t.Optional[t.Collection[TokenType]] = None, 5489 ) -> t.Optional[exp.Expression]: 5490 identifier = self._parse_identifier() 5491 5492 if identifier: 5493 return identifier 5494 5495 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 5496 quoted = self._prev.token_type == TokenType.STRING 5497 return exp.Identifier(this=self._prev.text, quoted=quoted) 5498 5499 return None 5500 5501 def _parse_string(self) -> t.Optional[exp.Expression]: 5502 if self._match_set(self.STRING_PARSERS): 5503 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5504 return self._parse_placeholder() 5505 5506 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5507 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5508 5509 def _parse_number(self) -> t.Optional[exp.Expression]: 5510 if self._match_set(self.NUMERIC_PARSERS): 5511 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5512 return self._parse_placeholder() 5513 5514 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5515 if self._match(TokenType.IDENTIFIER): 5516 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5517 return self._parse_placeholder() 5518 5519 def _parse_var( 5520 self, 5521 any_token: bool = False, 5522 tokens: t.Optional[t.Collection[TokenType]] = None, 5523 upper: bool = False, 5524 ) -> t.Optional[exp.Expression]: 5525 if ( 5526 (any_token and self._advance_any()) 5527 or self._match(TokenType.VAR) 5528 or (self._match_set(tokens) if tokens else False) 5529 ): 5530 return self.expression( 5531 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5532 ) 5533 return self._parse_placeholder() 5534 5535 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5536 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5537 self._advance() 5538 return self._prev 5539 return None 5540 5541 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5542 return self._parse_var() or self._parse_string() 5543 5544 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5545 return self._parse_primary() or self._parse_var(any_token=True) 5546 5547 def _parse_null(self) -> t.Optional[exp.Expression]: 5548 if self._match_set(self.NULL_TOKENS): 5549 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5550 return self._parse_placeholder() 5551 5552 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5553 if self._match(TokenType.TRUE): 5554 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5555 if self._match(TokenType.FALSE): 5556 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5557 return self._parse_placeholder() 5558 5559 def _parse_star(self) -> t.Optional[exp.Expression]: 5560 if self._match(TokenType.STAR): 5561 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5562 return self._parse_placeholder() 5563 5564 def _parse_parameter(self) -> exp.Parameter: 5565 self._match(TokenType.L_BRACE) 5566 this = self._parse_identifier() or self._parse_primary_or_var() 5567 expression = self._match(TokenType.COLON) and ( 5568 self._parse_identifier() or self._parse_primary_or_var() 5569 ) 5570 self._match(TokenType.R_BRACE) 5571 return self.expression(exp.Parameter, this=this, expression=expression) 5572 5573 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5574 if self._match_set(self.PLACEHOLDER_PARSERS): 5575 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5576 if placeholder: 5577 return placeholder 5578 self._advance(-1) 5579 return None 5580 5581 def _parse_except(self) -> t.Optional[t.List[exp.Expression]]: 5582 if not self._match(TokenType.EXCEPT): 5583 return None 5584 if self._match(TokenType.L_PAREN, advance=False): 5585 return self._parse_wrapped_csv(self._parse_column) 5586 5587 except_column = self._parse_column() 5588 return [except_column] if except_column else None 5589 5590 def _parse_replace(self) -> t.Optional[t.List[exp.Expression]]: 5591 if not self._match(TokenType.REPLACE): 5592 return None 5593 if self._match(TokenType.L_PAREN, advance=False): 5594 return self._parse_wrapped_csv(self._parse_expression) 5595 5596 replace_expression = self._parse_expression() 5597 return [replace_expression] if replace_expression else None 5598 5599 def _parse_csv( 5600 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5601 ) -> t.List[exp.Expression]: 5602 parse_result = parse_method() 5603 items = [parse_result] if parse_result is not None else [] 5604 5605 while self._match(sep): 5606 self._add_comments(parse_result) 5607 parse_result = parse_method() 5608 if parse_result is not None: 5609 items.append(parse_result) 5610 5611 return items 5612 5613 def _parse_tokens( 5614 self, parse_method: t.Callable, expressions: t.Dict 5615 ) -> t.Optional[exp.Expression]: 5616 this = parse_method() 5617 5618 while self._match_set(expressions): 5619 this = self.expression( 5620 expressions[self._prev.token_type], 5621 this=this, 5622 comments=self._prev_comments, 5623 expression=parse_method(), 5624 ) 5625 5626 return this 5627 5628 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5629 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5630 5631 def _parse_wrapped_csv( 5632 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5633 ) -> t.List[exp.Expression]: 5634 return self._parse_wrapped( 5635 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5636 ) 5637 5638 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5639 wrapped = self._match(TokenType.L_PAREN) 5640 if not wrapped and not optional: 5641 self.raise_error("Expecting (") 5642 parse_result = parse_method() 5643 if wrapped: 5644 self._match_r_paren() 5645 return parse_result 5646 5647 def _parse_expressions(self) -> t.List[exp.Expression]: 5648 return self._parse_csv(self._parse_expression) 5649 5650 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5651 return self._parse_select() or self._parse_set_operations( 5652 self._parse_expression() if alias else self._parse_conjunction() 5653 ) 5654 5655 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5656 return self._parse_query_modifiers( 5657 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5658 ) 5659 5660 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5661 this = None 5662 if self._match_texts(self.TRANSACTION_KIND): 5663 this = self._prev.text 5664 5665 self._match_texts(("TRANSACTION", "WORK")) 5666 5667 modes = [] 5668 while True: 5669 mode = [] 5670 while self._match(TokenType.VAR): 5671 mode.append(self._prev.text) 5672 5673 if mode: 5674 modes.append(" ".join(mode)) 5675 if not self._match(TokenType.COMMA): 5676 break 5677 5678 return self.expression(exp.Transaction, this=this, modes=modes) 5679 5680 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5681 chain = None 5682 savepoint = None 5683 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5684 5685 self._match_texts(("TRANSACTION", "WORK")) 5686 5687 if self._match_text_seq("TO"): 5688 self._match_text_seq("SAVEPOINT") 5689 savepoint = self._parse_id_var() 5690 5691 if self._match(TokenType.AND): 5692 chain = not self._match_text_seq("NO") 5693 self._match_text_seq("CHAIN") 5694 5695 if is_rollback: 5696 return self.expression(exp.Rollback, savepoint=savepoint) 5697 5698 return self.expression(exp.Commit, chain=chain) 5699 5700 def _parse_refresh(self) -> exp.Refresh: 5701 self._match(TokenType.TABLE) 5702 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5703 5704 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5705 if not self._match_text_seq("ADD"): 5706 return None 5707 5708 self._match(TokenType.COLUMN) 5709 exists_column = self._parse_exists(not_=True) 5710 expression = self._parse_field_def() 5711 5712 if expression: 5713 expression.set("exists", exists_column) 5714 5715 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5716 if self._match_texts(("FIRST", "AFTER")): 5717 position = self._prev.text 5718 column_position = self.expression( 5719 exp.ColumnPosition, this=self._parse_column(), position=position 5720 ) 5721 expression.set("position", column_position) 5722 5723 return expression 5724 5725 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5726 drop = self._match(TokenType.DROP) and self._parse_drop() 5727 if drop and not isinstance(drop, exp.Command): 5728 drop.set("kind", drop.args.get("kind", "COLUMN")) 5729 return drop 5730 5731 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5732 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5733 return self.expression( 5734 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5735 ) 5736 5737 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5738 index = self._index - 1 5739 5740 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5741 return self._parse_csv( 5742 lambda: self.expression( 5743 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5744 ) 5745 ) 5746 5747 self._retreat(index) 5748 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5749 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5750 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5751 5752 def _parse_alter_table_alter(self) -> exp.AlterColumn: 5753 self._match(TokenType.COLUMN) 5754 column = self._parse_field(any_token=True) 5755 5756 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5757 return self.expression(exp.AlterColumn, this=column, drop=True) 5758 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5759 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5760 if self._match(TokenType.COMMENT): 5761 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5762 5763 self._match_text_seq("SET", "DATA") 5764 self._match_text_seq("TYPE") 5765 return self.expression( 5766 exp.AlterColumn, 5767 this=column, 5768 dtype=self._parse_types(), 5769 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5770 using=self._match(TokenType.USING) and self._parse_conjunction(), 5771 ) 5772 5773 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5774 index = self._index - 1 5775 5776 partition_exists = self._parse_exists() 5777 if self._match(TokenType.PARTITION, advance=False): 5778 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5779 5780 self._retreat(index) 5781 return self._parse_csv(self._parse_drop_column) 5782 5783 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5784 if self._match(TokenType.COLUMN): 5785 exists = self._parse_exists() 5786 old_column = self._parse_column() 5787 to = self._match_text_seq("TO") 5788 new_column = self._parse_column() 5789 5790 if old_column is None or to is None or new_column is None: 5791 return None 5792 5793 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5794 5795 self._match_text_seq("TO") 5796 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5797 5798 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5799 start = self._prev 5800 5801 if not self._match(TokenType.TABLE): 5802 return self._parse_as_command(start) 5803 5804 exists = self._parse_exists() 5805 only = self._match_text_seq("ONLY") 5806 this = self._parse_table(schema=True) 5807 5808 if self._next: 5809 self._advance() 5810 5811 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5812 if parser: 5813 actions = ensure_list(parser(self)) 5814 options = self._parse_csv(self._parse_property) 5815 5816 if not self._curr and actions: 5817 return self.expression( 5818 exp.AlterTable, 5819 this=this, 5820 exists=exists, 5821 actions=actions, 5822 only=only, 5823 options=options, 5824 ) 5825 5826 return self._parse_as_command(start) 5827 5828 def _parse_merge(self) -> exp.Merge: 5829 self._match(TokenType.INTO) 5830 target = self._parse_table() 5831 5832 if target and self._match(TokenType.ALIAS, advance=False): 5833 target.set("alias", self._parse_table_alias()) 5834 5835 self._match(TokenType.USING) 5836 using = self._parse_table() 5837 5838 self._match(TokenType.ON) 5839 on = self._parse_conjunction() 5840 5841 return self.expression( 5842 exp.Merge, 5843 this=target, 5844 using=using, 5845 on=on, 5846 expressions=self._parse_when_matched(), 5847 ) 5848 5849 def _parse_when_matched(self) -> t.List[exp.When]: 5850 whens = [] 5851 5852 while self._match(TokenType.WHEN): 5853 matched = not self._match(TokenType.NOT) 5854 self._match_text_seq("MATCHED") 5855 source = ( 5856 False 5857 if self._match_text_seq("BY", "TARGET") 5858 else self._match_text_seq("BY", "SOURCE") 5859 ) 5860 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 5861 5862 self._match(TokenType.THEN) 5863 5864 if self._match(TokenType.INSERT): 5865 _this = self._parse_star() 5866 if _this: 5867 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 5868 else: 5869 then = self.expression( 5870 exp.Insert, 5871 this=self._parse_value(), 5872 expression=self._match_text_seq("VALUES") and self._parse_value(), 5873 ) 5874 elif self._match(TokenType.UPDATE): 5875 expressions = self._parse_star() 5876 if expressions: 5877 then = self.expression(exp.Update, expressions=expressions) 5878 else: 5879 then = self.expression( 5880 exp.Update, 5881 expressions=self._match(TokenType.SET) 5882 and self._parse_csv(self._parse_equality), 5883 ) 5884 elif self._match(TokenType.DELETE): 5885 then = self.expression(exp.Var, this=self._prev.text) 5886 else: 5887 then = None 5888 5889 whens.append( 5890 self.expression( 5891 exp.When, 5892 matched=matched, 5893 source=source, 5894 condition=condition, 5895 then=then, 5896 ) 5897 ) 5898 return whens 5899 5900 def _parse_show(self) -> t.Optional[exp.Expression]: 5901 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 5902 if parser: 5903 return parser(self) 5904 return self._parse_as_command(self._prev) 5905 5906 def _parse_set_item_assignment( 5907 self, kind: t.Optional[str] = None 5908 ) -> t.Optional[exp.Expression]: 5909 index = self._index 5910 5911 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 5912 return self._parse_set_transaction(global_=kind == "GLOBAL") 5913 5914 left = self._parse_primary() or self._parse_id_var() 5915 assignment_delimiter = self._match_texts(("=", "TO")) 5916 5917 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 5918 self._retreat(index) 5919 return None 5920 5921 right = self._parse_statement() or self._parse_id_var() 5922 this = self.expression(exp.EQ, this=left, expression=right) 5923 5924 return self.expression(exp.SetItem, this=this, kind=kind) 5925 5926 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 5927 self._match_text_seq("TRANSACTION") 5928 characteristics = self._parse_csv( 5929 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 5930 ) 5931 return self.expression( 5932 exp.SetItem, 5933 expressions=characteristics, 5934 kind="TRANSACTION", 5935 **{"global": global_}, # type: ignore 5936 ) 5937 5938 def _parse_set_item(self) -> t.Optional[exp.Expression]: 5939 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 5940 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 5941 5942 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 5943 index = self._index 5944 set_ = self.expression( 5945 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 5946 ) 5947 5948 if self._curr: 5949 self._retreat(index) 5950 return self._parse_as_command(self._prev) 5951 5952 return set_ 5953 5954 def _parse_var_from_options( 5955 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 5956 ) -> t.Optional[exp.Var]: 5957 start = self._curr 5958 if not start: 5959 return None 5960 5961 option = start.text.upper() 5962 continuations = options.get(option) 5963 5964 index = self._index 5965 self._advance() 5966 for keywords in continuations or []: 5967 if isinstance(keywords, str): 5968 keywords = (keywords,) 5969 5970 if self._match_text_seq(*keywords): 5971 option = f"{option} {' '.join(keywords)}" 5972 break 5973 else: 5974 if continuations or continuations is None: 5975 if raise_unmatched: 5976 self.raise_error(f"Unknown option {option}") 5977 5978 self._retreat(index) 5979 return None 5980 5981 return exp.var(option) 5982 5983 def _parse_as_command(self, start: Token) -> exp.Command: 5984 while self._curr: 5985 self._advance() 5986 text = self._find_sql(start, self._prev) 5987 size = len(start.text) 5988 self._warn_unsupported() 5989 return exp.Command(this=text[:size], expression=text[size:]) 5990 5991 def _parse_dict_property(self, this: str) -> exp.DictProperty: 5992 settings = [] 5993 5994 self._match_l_paren() 5995 kind = self._parse_id_var() 5996 5997 if self._match(TokenType.L_PAREN): 5998 while True: 5999 key = self._parse_id_var() 6000 value = self._parse_primary() 6001 6002 if not key and value is None: 6003 break 6004 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6005 self._match(TokenType.R_PAREN) 6006 6007 self._match_r_paren() 6008 6009 return self.expression( 6010 exp.DictProperty, 6011 this=this, 6012 kind=kind.this if kind else None, 6013 settings=settings, 6014 ) 6015 6016 def _parse_dict_range(self, this: str) -> exp.DictRange: 6017 self._match_l_paren() 6018 has_min = self._match_text_seq("MIN") 6019 if has_min: 6020 min = self._parse_var() or self._parse_primary() 6021 self._match_text_seq("MAX") 6022 max = self._parse_var() or self._parse_primary() 6023 else: 6024 max = self._parse_var() or self._parse_primary() 6025 min = exp.Literal.number(0) 6026 self._match_r_paren() 6027 return self.expression(exp.DictRange, this=this, min=min, max=max) 6028 6029 def _parse_comprehension( 6030 self, this: t.Optional[exp.Expression] 6031 ) -> t.Optional[exp.Comprehension]: 6032 index = self._index 6033 expression = self._parse_column() 6034 if not self._match(TokenType.IN): 6035 self._retreat(index - 1) 6036 return None 6037 iterator = self._parse_column() 6038 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6039 return self.expression( 6040 exp.Comprehension, 6041 this=this, 6042 expression=expression, 6043 iterator=iterator, 6044 condition=condition, 6045 ) 6046 6047 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6048 if self._match(TokenType.HEREDOC_STRING): 6049 return self.expression(exp.Heredoc, this=self._prev.text) 6050 6051 if not self._match_text_seq("$"): 6052 return None 6053 6054 tags = ["$"] 6055 tag_text = None 6056 6057 if self._is_connected(): 6058 self._advance() 6059 tags.append(self._prev.text.upper()) 6060 else: 6061 self.raise_error("No closing $ found") 6062 6063 if tags[-1] != "$": 6064 if self._is_connected() and self._match_text_seq("$"): 6065 tag_text = tags[-1] 6066 tags.append("$") 6067 else: 6068 self.raise_error("No closing $ found") 6069 6070 heredoc_start = self._curr 6071 6072 while self._curr: 6073 if self._match_text_seq(*tags, advance=False): 6074 this = self._find_sql(heredoc_start, self._prev) 6075 self._advance(len(tags)) 6076 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6077 6078 self._advance() 6079 6080 self.raise_error(f"No closing {''.join(tags)} found") 6081 return None 6082 6083 def _find_parser( 6084 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6085 ) -> t.Optional[t.Callable]: 6086 if not self._curr: 6087 return None 6088 6089 index = self._index 6090 this = [] 6091 while True: 6092 # The current token might be multiple words 6093 curr = self._curr.text.upper() 6094 key = curr.split(" ") 6095 this.append(curr) 6096 6097 self._advance() 6098 result, trie = in_trie(trie, key) 6099 if result == TrieResult.FAILED: 6100 break 6101 6102 if result == TrieResult.EXISTS: 6103 subparser = parsers[" ".join(this)] 6104 return subparser 6105 6106 self._retreat(index) 6107 return None 6108 6109 def _match(self, token_type, advance=True, expression=None): 6110 if not self._curr: 6111 return None 6112 6113 if self._curr.token_type == token_type: 6114 if advance: 6115 self._advance() 6116 self._add_comments(expression) 6117 return True 6118 6119 return None 6120 6121 def _match_set(self, types, advance=True): 6122 if not self._curr: 6123 return None 6124 6125 if self._curr.token_type in types: 6126 if advance: 6127 self._advance() 6128 return True 6129 6130 return None 6131 6132 def _match_pair(self, token_type_a, token_type_b, advance=True): 6133 if not self._curr or not self._next: 6134 return None 6135 6136 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6137 if advance: 6138 self._advance(2) 6139 return True 6140 6141 return None 6142 6143 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6144 if not self._match(TokenType.L_PAREN, expression=expression): 6145 self.raise_error("Expecting (") 6146 6147 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6148 if not self._match(TokenType.R_PAREN, expression=expression): 6149 self.raise_error("Expecting )") 6150 6151 def _match_texts(self, texts, advance=True): 6152 if self._curr and self._curr.text.upper() in texts: 6153 if advance: 6154 self._advance() 6155 return True 6156 return None 6157 6158 def _match_text_seq(self, *texts, advance=True): 6159 index = self._index 6160 for text in texts: 6161 if self._curr and self._curr.text.upper() == text: 6162 self._advance() 6163 else: 6164 self._retreat(index) 6165 return None 6166 6167 if not advance: 6168 self._retreat(index) 6169 6170 return True 6171 6172 def _replace_lambda( 6173 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6174 ) -> t.Optional[exp.Expression]: 6175 if not node: 6176 return node 6177 6178 for column in node.find_all(exp.Column): 6179 if column.parts[0].name in lambda_variables: 6180 dot_or_id = column.to_dot() if column.table else column.this 6181 parent = column.parent 6182 6183 while isinstance(parent, exp.Dot): 6184 if not isinstance(parent.parent, exp.Dot): 6185 parent.replace(dot_or_id) 6186 break 6187 parent = parent.parent 6188 else: 6189 if column is node: 6190 node = dot_or_id 6191 else: 6192 column.replace(dot_or_id) 6193 return node 6194 6195 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6196 start = self._prev 6197 6198 # Not to be confused with TRUNCATE(number, decimals) function call 6199 if self._match(TokenType.L_PAREN): 6200 self._retreat(self._index - 2) 6201 return self._parse_function() 6202 6203 # Clickhouse supports TRUNCATE DATABASE as well 6204 is_database = self._match(TokenType.DATABASE) 6205 6206 self._match(TokenType.TABLE) 6207 6208 exists = self._parse_exists(not_=False) 6209 6210 expressions = self._parse_csv( 6211 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6212 ) 6213 6214 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6215 6216 if self._match_text_seq("RESTART", "IDENTITY"): 6217 identity = "RESTART" 6218 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6219 identity = "CONTINUE" 6220 else: 6221 identity = None 6222 6223 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6224 option = self._prev.text 6225 else: 6226 option = None 6227 6228 partition = self._parse_partition() 6229 6230 # Fallback case 6231 if self._curr: 6232 return self._parse_as_command(start) 6233 6234 return self.expression( 6235 exp.TruncateTable, 6236 expressions=expressions, 6237 is_database=is_database, 6238 exists=exists, 6239 cluster=cluster, 6240 identity=identity, 6241 option=option, 6242 partition=partition, 6243 ) 6244 6245 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6246 this = self._parse_ordered(self._parse_opclass) 6247 6248 if not self._match(TokenType.WITH): 6249 return this 6250 6251 op = self._parse_var(any_token=True) 6252 6253 return self.expression(exp.WithOperator, this=this, op=op)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1132 def __init__( 1133 self, 1134 error_level: t.Optional[ErrorLevel] = None, 1135 error_message_context: int = 100, 1136 max_errors: int = 3, 1137 dialect: DialectType = None, 1138 ): 1139 from sqlglot.dialects import Dialect 1140 1141 self.error_level = error_level or ErrorLevel.IMMEDIATE 1142 self.error_message_context = error_message_context 1143 self.max_errors = max_errors 1144 self.dialect = Dialect.get_or_raise(dialect) 1145 self.reset()
1157 def parse( 1158 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1159 ) -> t.List[t.Optional[exp.Expression]]: 1160 """ 1161 Parses a list of tokens and returns a list of syntax trees, one tree 1162 per parsed SQL statement. 1163 1164 Args: 1165 raw_tokens: The list of tokens. 1166 sql: The original SQL string, used to produce helpful debug messages. 1167 1168 Returns: 1169 The list of the produced syntax trees. 1170 """ 1171 return self._parse( 1172 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1173 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1175 def parse_into( 1176 self, 1177 expression_types: exp.IntoType, 1178 raw_tokens: t.List[Token], 1179 sql: t.Optional[str] = None, 1180 ) -> t.List[t.Optional[exp.Expression]]: 1181 """ 1182 Parses a list of tokens into a given Expression type. If a collection of Expression 1183 types is given instead, this method will try to parse the token list into each one 1184 of them, stopping at the first for which the parsing succeeds. 1185 1186 Args: 1187 expression_types: The expression type(s) to try and parse the token list into. 1188 raw_tokens: The list of tokens. 1189 sql: The original SQL string, used to produce helpful debug messages. 1190 1191 Returns: 1192 The target Expression. 1193 """ 1194 errors = [] 1195 for expression_type in ensure_list(expression_types): 1196 parser = self.EXPRESSION_PARSERS.get(expression_type) 1197 if not parser: 1198 raise TypeError(f"No parser registered for {expression_type}") 1199 1200 try: 1201 return self._parse(parser, raw_tokens, sql) 1202 except ParseError as e: 1203 e.errors[0]["into_expression"] = expression_type 1204 errors.append(e) 1205 1206 raise ParseError( 1207 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1208 errors=merge_errors(errors), 1209 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1246 def check_errors(self) -> None: 1247 """Logs or raises any found errors, depending on the chosen error level setting.""" 1248 if self.error_level == ErrorLevel.WARN: 1249 for error in self.errors: 1250 logger.error(str(error)) 1251 elif self.error_level == ErrorLevel.RAISE and self.errors: 1252 raise ParseError( 1253 concat_messages(self.errors, self.max_errors), 1254 errors=merge_errors(self.errors), 1255 )
Logs or raises any found errors, depending on the chosen error level setting.
1257 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1258 """ 1259 Appends an error in the list of recorded errors or raises it, depending on the chosen 1260 error level setting. 1261 """ 1262 token = token or self._curr or self._prev or Token.string("") 1263 start = token.start 1264 end = token.end + 1 1265 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1266 highlight = self.sql[start:end] 1267 end_context = self.sql[end : end + self.error_message_context] 1268 1269 error = ParseError.new( 1270 f"{message}. Line {token.line}, Col: {token.col}.\n" 1271 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1272 description=message, 1273 line=token.line, 1274 col=token.col, 1275 start_context=start_context, 1276 highlight=highlight, 1277 end_context=end_context, 1278 ) 1279 1280 if self.error_level == ErrorLevel.IMMEDIATE: 1281 raise error 1282 1283 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1285 def expression( 1286 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1287 ) -> E: 1288 """ 1289 Creates a new, validated Expression. 1290 1291 Args: 1292 exp_class: The expression class to instantiate. 1293 comments: An optional list of comments to attach to the expression. 1294 kwargs: The arguments to set for the expression along with their respective values. 1295 1296 Returns: 1297 The target expression. 1298 """ 1299 instance = exp_class(**kwargs) 1300 instance.add_comments(comments) if comments else self._add_comments(instance) 1301 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1308 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1309 """ 1310 Validates an Expression, making sure that all its mandatory arguments are set. 1311 1312 Args: 1313 expression: The expression to validate. 1314 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1315 1316 Returns: 1317 The validated expression. 1318 """ 1319 if self.error_level != ErrorLevel.IGNORE: 1320 for error_message in expression.error_messages(args): 1321 self.raise_error(error_message) 1322 1323 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.