sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21) 22from sqlglot.generator import Generator 23from sqlglot.helper import is_int, seq_get 24from sqlglot.tokens import Token, TokenType 25 26DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 27 28 29def _build_date_format(args: t.List) -> exp.TimeToStr: 30 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 31 32 timezone = seq_get(args, 2) 33 if timezone: 34 expr.set("timezone", timezone) 35 36 return expr 37 38 39def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 40 scale = expression.args.get("scale") 41 timestamp = expression.this 42 43 if scale in (None, exp.UnixToTime.SECONDS): 44 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 45 if scale == exp.UnixToTime.MILLIS: 46 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MICROS: 48 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.NANOS: 50 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 52 return self.func( 53 "fromUnixTimestamp", 54 exp.cast( 55 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 56 ), 57 ) 58 59 60def _lower_func(sql: str) -> str: 61 index = sql.index("(") 62 return sql[:index].lower() + sql[index:] 63 64 65def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 66 quantile = expression.args["quantile"] 67 args = f"({self.sql(expression, 'this')})" 68 69 if isinstance(quantile, exp.Array): 70 func = self.func("quantiles", *quantile) 71 else: 72 func = self.func("quantile", quantile) 73 74 return func + args 75 76 77def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 78 if len(args) == 1: 79 return exp.CountIf(this=seq_get(args, 0)) 80 81 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 82 83 84def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 85 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 86 if not expression.unit: 87 return rename_func(name)(self, expression) 88 89 return self.func( 90 name, 91 unit_to_var(expression), 92 expression.expression, 93 expression.this, 94 ) 95 96 return _delta_sql 97 98 99class ClickHouse(Dialect): 100 NORMALIZE_FUNCTIONS: bool | str = False 101 NULL_ORDERING = "nulls_are_last" 102 SUPPORTS_USER_DEFINED_TYPES = False 103 SAFE_DIVISION = True 104 LOG_BASE_FIRST: t.Optional[bool] = None 105 FORCE_EARLY_ALIAS_REF_EXPANSION = True 106 107 UNESCAPED_SEQUENCES = { 108 "\\0": "\0", 109 } 110 111 class Tokenizer(tokens.Tokenizer): 112 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 113 IDENTIFIERS = ['"', "`"] 114 STRING_ESCAPES = ["'", "\\"] 115 BIT_STRINGS = [("0b", "")] 116 HEX_STRINGS = [("0x", ""), ("0X", "")] 117 HEREDOC_STRINGS = ["$"] 118 119 KEYWORDS = { 120 **tokens.Tokenizer.KEYWORDS, 121 "ATTACH": TokenType.COMMAND, 122 "DATE32": TokenType.DATE32, 123 "DATETIME64": TokenType.DATETIME64, 124 "DICTIONARY": TokenType.DICTIONARY, 125 "ENUM8": TokenType.ENUM8, 126 "ENUM16": TokenType.ENUM16, 127 "FINAL": TokenType.FINAL, 128 "FIXEDSTRING": TokenType.FIXEDSTRING, 129 "FLOAT32": TokenType.FLOAT, 130 "FLOAT64": TokenType.DOUBLE, 131 "GLOBAL": TokenType.GLOBAL, 132 "INT256": TokenType.INT256, 133 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 134 "MAP": TokenType.MAP, 135 "NESTED": TokenType.NESTED, 136 "SAMPLE": TokenType.TABLE_SAMPLE, 137 "TUPLE": TokenType.STRUCT, 138 "UINT128": TokenType.UINT128, 139 "UINT16": TokenType.USMALLINT, 140 "UINT256": TokenType.UINT256, 141 "UINT32": TokenType.UINT, 142 "UINT64": TokenType.UBIGINT, 143 "UINT8": TokenType.UTINYINT, 144 "IPV4": TokenType.IPV4, 145 "IPV6": TokenType.IPV6, 146 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 147 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 148 "SYSTEM": TokenType.COMMAND, 149 "PREWHERE": TokenType.PREWHERE, 150 } 151 KEYWORDS.pop("/*+") 152 153 SINGLE_TOKENS = { 154 **tokens.Tokenizer.SINGLE_TOKENS, 155 "$": TokenType.HEREDOC_STRING, 156 } 157 158 class Parser(parser.Parser): 159 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 160 # * select x from t1 union all select x from t2 limit 1; 161 # * select x from t1 union all (select x from t2 limit 1); 162 MODIFIERS_ATTACHED_TO_SET_OP = False 163 INTERVAL_SPANS = False 164 165 FUNCTIONS = { 166 **parser.Parser.FUNCTIONS, 167 "ANY": exp.AnyValue.from_arg_list, 168 "ARRAYSUM": exp.ArraySum.from_arg_list, 169 "COUNTIF": _build_count_if, 170 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 171 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 173 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATE_FORMAT": _build_date_format, 175 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 176 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 177 "EXTRACT": exp.RegexpExtract.from_arg_list, 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 AGG_FUNC_MAPPING = ( 337 lambda functions, suffixes: { 338 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 339 } 340 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 341 342 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 343 344 FUNCTION_PARSERS = { 345 **parser.Parser.FUNCTION_PARSERS, 346 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 347 "QUANTILE": lambda self: self._parse_quantile(), 348 } 349 350 FUNCTION_PARSERS.pop("EXTRACT") 351 FUNCTION_PARSERS.pop("MATCH") 352 353 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 354 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 355 356 RANGE_PARSERS = { 357 **parser.Parser.RANGE_PARSERS, 358 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 359 and self._parse_in(this, is_global=True), 360 } 361 362 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 363 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 364 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 365 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 366 367 JOIN_KINDS = { 368 *parser.Parser.JOIN_KINDS, 369 TokenType.ANY, 370 TokenType.ASOF, 371 TokenType.ARRAY, 372 } 373 374 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 375 TokenType.ANY, 376 TokenType.ARRAY, 377 TokenType.FINAL, 378 TokenType.FORMAT, 379 TokenType.SETTINGS, 380 } 381 382 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 383 TokenType.FORMAT, 384 } 385 386 LOG_DEFAULTS_TO_LN = True 387 388 QUERY_MODIFIER_PARSERS = { 389 **parser.Parser.QUERY_MODIFIER_PARSERS, 390 TokenType.SETTINGS: lambda self: ( 391 "settings", 392 self._advance() or self._parse_csv(self._parse_assignment), 393 ), 394 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 395 } 396 397 CONSTRAINT_PARSERS = { 398 **parser.Parser.CONSTRAINT_PARSERS, 399 "INDEX": lambda self: self._parse_index_constraint(), 400 "CODEC": lambda self: self._parse_compress(), 401 } 402 403 ALTER_PARSERS = { 404 **parser.Parser.ALTER_PARSERS, 405 "REPLACE": lambda self: self._parse_alter_table_replace(), 406 } 407 408 SCHEMA_UNNAMED_CONSTRAINTS = { 409 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 410 "INDEX", 411 } 412 413 def _parse_assignment(self) -> t.Optional[exp.Expression]: 414 this = super()._parse_assignment() 415 416 if self._match(TokenType.PLACEHOLDER): 417 return self.expression( 418 exp.If, 419 this=this, 420 true=self._parse_assignment(), 421 false=self._match(TokenType.COLON) and self._parse_assignment(), 422 ) 423 424 return this 425 426 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 427 """ 428 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 429 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 430 """ 431 if not self._match(TokenType.L_BRACE): 432 return None 433 434 this = self._parse_id_var() 435 self._match(TokenType.COLON) 436 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 437 self._match_text_seq("IDENTIFIER") and "Identifier" 438 ) 439 440 if not kind: 441 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 442 elif not self._match(TokenType.R_BRACE): 443 self.raise_error("Expecting }") 444 445 return self.expression(exp.Placeholder, this=this, kind=kind) 446 447 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 448 this = super()._parse_in(this) 449 this.set("is_global", is_global) 450 return this 451 452 def _parse_table( 453 self, 454 schema: bool = False, 455 joins: bool = False, 456 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 457 parse_bracket: bool = False, 458 is_db_reference: bool = False, 459 parse_partition: bool = False, 460 ) -> t.Optional[exp.Expression]: 461 this = super()._parse_table( 462 schema=schema, 463 joins=joins, 464 alias_tokens=alias_tokens, 465 parse_bracket=parse_bracket, 466 is_db_reference=is_db_reference, 467 ) 468 469 if self._match(TokenType.FINAL): 470 this = self.expression(exp.Final, this=this) 471 472 return this 473 474 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 475 return super()._parse_position(haystack_first=True) 476 477 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 478 def _parse_cte(self) -> exp.CTE: 479 # WITH <identifier> AS <subquery expression> 480 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 481 482 if not cte: 483 # WITH <expression> AS <identifier> 484 cte = self.expression( 485 exp.CTE, 486 this=self._parse_assignment(), 487 alias=self._parse_table_alias(), 488 scalar=True, 489 ) 490 491 return cte 492 493 def _parse_join_parts( 494 self, 495 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 496 is_global = self._match(TokenType.GLOBAL) and self._prev 497 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 498 499 if kind_pre: 500 kind = self._match_set(self.JOIN_KINDS) and self._prev 501 side = self._match_set(self.JOIN_SIDES) and self._prev 502 return is_global, side, kind 503 504 return ( 505 is_global, 506 self._match_set(self.JOIN_SIDES) and self._prev, 507 self._match_set(self.JOIN_KINDS) and self._prev, 508 ) 509 510 def _parse_join( 511 self, skip_join_token: bool = False, parse_bracket: bool = False 512 ) -> t.Optional[exp.Join]: 513 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 514 if join: 515 join.set("global", join.args.pop("method", None)) 516 517 return join 518 519 def _parse_function( 520 self, 521 functions: t.Optional[t.Dict[str, t.Callable]] = None, 522 anonymous: bool = False, 523 optional_parens: bool = True, 524 any_token: bool = False, 525 ) -> t.Optional[exp.Expression]: 526 expr = super()._parse_function( 527 functions=functions, 528 anonymous=anonymous, 529 optional_parens=optional_parens, 530 any_token=any_token, 531 ) 532 533 func = expr.this if isinstance(expr, exp.Window) else expr 534 535 # Aggregate functions can be split in 2 parts: <func_name><suffix> 536 parts = ( 537 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 538 ) 539 540 if parts: 541 params = self._parse_func_params(func) 542 543 kwargs = { 544 "this": func.this, 545 "expressions": func.expressions, 546 } 547 if parts[1]: 548 kwargs["parts"] = parts 549 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 550 else: 551 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 552 553 kwargs["exp_class"] = exp_class 554 if params: 555 kwargs["params"] = params 556 557 func = self.expression(**kwargs) 558 559 if isinstance(expr, exp.Window): 560 # The window's func was parsed as Anonymous in base parser, fix its 561 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 562 expr.set("this", func) 563 elif params: 564 # Params have blocked super()._parse_function() from parsing the following window 565 # (if that exists) as they're standing between the function call and the window spec 566 expr = self._parse_window(func) 567 else: 568 expr = func 569 570 return expr 571 572 def _parse_func_params( 573 self, this: t.Optional[exp.Func] = None 574 ) -> t.Optional[t.List[exp.Expression]]: 575 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 576 return self._parse_csv(self._parse_lambda) 577 578 if self._match(TokenType.L_PAREN): 579 params = self._parse_csv(self._parse_lambda) 580 self._match_r_paren(this) 581 return params 582 583 return None 584 585 def _parse_quantile(self) -> exp.Quantile: 586 this = self._parse_lambda() 587 params = self._parse_func_params() 588 if params: 589 return self.expression(exp.Quantile, this=params[0], quantile=this) 590 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 591 592 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 593 return super()._parse_wrapped_id_vars(optional=True) 594 595 def _parse_primary_key( 596 self, wrapped_optional: bool = False, in_props: bool = False 597 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 598 return super()._parse_primary_key( 599 wrapped_optional=wrapped_optional or in_props, in_props=in_props 600 ) 601 602 def _parse_on_property(self) -> t.Optional[exp.Expression]: 603 index = self._index 604 if self._match_text_seq("CLUSTER"): 605 this = self._parse_id_var() 606 if this: 607 return self.expression(exp.OnCluster, this=this) 608 else: 609 self._retreat(index) 610 return None 611 612 def _parse_index_constraint( 613 self, kind: t.Optional[str] = None 614 ) -> exp.IndexColumnConstraint: 615 # INDEX name1 expr TYPE type1(args) GRANULARITY value 616 this = self._parse_id_var() 617 expression = self._parse_assignment() 618 619 index_type = self._match_text_seq("TYPE") and ( 620 self._parse_function() or self._parse_var() 621 ) 622 623 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 624 625 return self.expression( 626 exp.IndexColumnConstraint, 627 this=this, 628 expression=expression, 629 index_type=index_type, 630 granularity=granularity, 631 ) 632 633 def _parse_partition(self) -> t.Optional[exp.Partition]: 634 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 635 if not self._match(TokenType.PARTITION): 636 return None 637 638 if self._match_text_seq("ID"): 639 # Corresponds to the PARTITION ID <string_value> syntax 640 expressions: t.List[exp.Expression] = [ 641 self.expression(exp.PartitionId, this=self._parse_string()) 642 ] 643 else: 644 expressions = self._parse_expressions() 645 646 return self.expression(exp.Partition, expressions=expressions) 647 648 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 649 partition = self._parse_partition() 650 651 if not partition or not self._match(TokenType.FROM): 652 return None 653 654 return self.expression( 655 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 656 ) 657 658 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 659 if not self._match_text_seq("PROJECTION"): 660 return None 661 662 return self.expression( 663 exp.ProjectionDef, 664 this=self._parse_id_var(), 665 expression=self._parse_wrapped(self._parse_statement), 666 ) 667 668 def _parse_constraint(self) -> t.Optional[exp.Expression]: 669 return super()._parse_constraint() or self._parse_projection_def() 670 671 class Generator(generator.Generator): 672 QUERY_HINTS = False 673 STRUCT_DELIMITER = ("(", ")") 674 NVL2_SUPPORTED = False 675 TABLESAMPLE_REQUIRES_PARENS = False 676 TABLESAMPLE_SIZE_IS_ROWS = False 677 TABLESAMPLE_KEYWORDS = "SAMPLE" 678 LAST_DAY_SUPPORTS_DATE_PART = False 679 CAN_IMPLEMENT_ARRAY_ANY = True 680 SUPPORTS_TO_NUMBER = False 681 JOIN_HINTS = False 682 TABLE_HINTS = False 683 EXPLICIT_SET_OP = True 684 GROUPINGS_SEP = "" 685 SET_OP_MODIFIERS = False 686 687 STRING_TYPE_MAPPING = { 688 exp.DataType.Type.CHAR: "String", 689 exp.DataType.Type.LONGBLOB: "String", 690 exp.DataType.Type.LONGTEXT: "String", 691 exp.DataType.Type.MEDIUMBLOB: "String", 692 exp.DataType.Type.MEDIUMTEXT: "String", 693 exp.DataType.Type.TINYBLOB: "String", 694 exp.DataType.Type.TINYTEXT: "String", 695 exp.DataType.Type.TEXT: "String", 696 exp.DataType.Type.VARBINARY: "String", 697 exp.DataType.Type.VARCHAR: "String", 698 } 699 700 SUPPORTED_JSON_PATH_PARTS = { 701 exp.JSONPathKey, 702 exp.JSONPathRoot, 703 exp.JSONPathSubscript, 704 } 705 706 TYPE_MAPPING = { 707 **generator.Generator.TYPE_MAPPING, 708 **STRING_TYPE_MAPPING, 709 exp.DataType.Type.ARRAY: "Array", 710 exp.DataType.Type.BIGINT: "Int64", 711 exp.DataType.Type.DATE32: "Date32", 712 exp.DataType.Type.DATETIME64: "DateTime64", 713 exp.DataType.Type.DOUBLE: "Float64", 714 exp.DataType.Type.ENUM: "Enum", 715 exp.DataType.Type.ENUM8: "Enum8", 716 exp.DataType.Type.ENUM16: "Enum16", 717 exp.DataType.Type.FIXEDSTRING: "FixedString", 718 exp.DataType.Type.FLOAT: "Float32", 719 exp.DataType.Type.INT: "Int32", 720 exp.DataType.Type.MEDIUMINT: "Int32", 721 exp.DataType.Type.INT128: "Int128", 722 exp.DataType.Type.INT256: "Int256", 723 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 724 exp.DataType.Type.MAP: "Map", 725 exp.DataType.Type.NESTED: "Nested", 726 exp.DataType.Type.NULLABLE: "Nullable", 727 exp.DataType.Type.SMALLINT: "Int16", 728 exp.DataType.Type.STRUCT: "Tuple", 729 exp.DataType.Type.TINYINT: "Int8", 730 exp.DataType.Type.UBIGINT: "UInt64", 731 exp.DataType.Type.UINT: "UInt32", 732 exp.DataType.Type.UINT128: "UInt128", 733 exp.DataType.Type.UINT256: "UInt256", 734 exp.DataType.Type.USMALLINT: "UInt16", 735 exp.DataType.Type.UTINYINT: "UInt8", 736 exp.DataType.Type.IPV4: "IPv4", 737 exp.DataType.Type.IPV6: "IPv6", 738 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 739 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 740 } 741 742 TRANSFORMS = { 743 **generator.Generator.TRANSFORMS, 744 exp.AnyValue: rename_func("any"), 745 exp.ApproxDistinct: rename_func("uniq"), 746 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 747 exp.ArraySize: rename_func("LENGTH"), 748 exp.ArraySum: rename_func("arraySum"), 749 exp.ArgMax: arg_max_or_min_no_count("argMax"), 750 exp.ArgMin: arg_max_or_min_no_count("argMin"), 751 exp.Array: inline_array_sql, 752 exp.CastToStrType: rename_func("CAST"), 753 exp.CountIf: rename_func("countIf"), 754 exp.CompressColumnConstraint: lambda self, 755 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 756 exp.ComputedColumnConstraint: lambda self, 757 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 758 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 759 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 760 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 761 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 762 exp.Explode: rename_func("arrayJoin"), 763 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 764 exp.IsNan: rename_func("isNaN"), 765 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 766 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 767 exp.JSONPathKey: json_path_key_only_name, 768 exp.JSONPathRoot: lambda *_: "", 769 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 770 exp.Nullif: rename_func("nullIf"), 771 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 772 exp.Pivot: no_pivot_sql, 773 exp.Quantile: _quantile_sql, 774 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 775 exp.Rand: rename_func("randCanonical"), 776 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 777 exp.StartsWith: rename_func("startsWith"), 778 exp.StrPosition: lambda self, e: self.func( 779 "position", e.this, e.args.get("substr"), e.args.get("position") 780 ), 781 exp.TimeToStr: lambda self, e: self.func( 782 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 783 ), 784 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 785 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 786 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 787 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 788 exp.MD5Digest: rename_func("MD5"), 789 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 790 exp.SHA: rename_func("SHA1"), 791 exp.SHA2: sha256_sql, 792 exp.UnixToTime: _unix_to_time_sql, 793 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 794 exp.Variance: rename_func("varSamp"), 795 exp.Stddev: rename_func("stddevSamp"), 796 } 797 798 PROPERTIES_LOCATION = { 799 **generator.Generator.PROPERTIES_LOCATION, 800 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 801 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 802 exp.OnCluster: exp.Properties.Location.POST_NAME, 803 } 804 805 # there's no list in docs, but it can be found in Clickhouse code 806 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 807 ON_CLUSTER_TARGETS = { 808 "DATABASE", 809 "TABLE", 810 "VIEW", 811 "DICTIONARY", 812 "INDEX", 813 "FUNCTION", 814 "NAMED COLLECTION", 815 } 816 817 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 818 this = self.json_path_part(expression.this) 819 return str(int(this) + 1) if is_int(this) else this 820 821 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 822 return f"AS {self.sql(expression, 'this')}" 823 824 def _any_to_has( 825 self, 826 expression: exp.EQ | exp.NEQ, 827 default: t.Callable[[t.Any], str], 828 prefix: str = "", 829 ) -> str: 830 if isinstance(expression.left, exp.Any): 831 arr = expression.left 832 this = expression.right 833 elif isinstance(expression.right, exp.Any): 834 arr = expression.right 835 this = expression.left 836 else: 837 return default(expression) 838 839 return prefix + self.func("has", arr.this.unnest(), this) 840 841 def eq_sql(self, expression: exp.EQ) -> str: 842 return self._any_to_has(expression, super().eq_sql) 843 844 def neq_sql(self, expression: exp.NEQ) -> str: 845 return self._any_to_has(expression, super().neq_sql, "NOT ") 846 847 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 848 # Manually add a flag to make the search case-insensitive 849 regex = self.func("CONCAT", "'(?i)'", expression.expression) 850 return self.func("match", expression.this, regex) 851 852 def datatype_sql(self, expression: exp.DataType) -> str: 853 # String is the standard ClickHouse type, every other variant is just an alias. 854 # Additionally, any supplied length parameter will be ignored. 855 # 856 # https://clickhouse.com/docs/en/sql-reference/data-types/string 857 if expression.this in self.STRING_TYPE_MAPPING: 858 return "String" 859 860 return super().datatype_sql(expression) 861 862 def cte_sql(self, expression: exp.CTE) -> str: 863 if expression.args.get("scalar"): 864 this = self.sql(expression, "this") 865 alias = self.sql(expression, "alias") 866 return f"{this} AS {alias}" 867 868 return super().cte_sql(expression) 869 870 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 871 return super().after_limit_modifiers(expression) + [ 872 ( 873 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 874 if expression.args.get("settings") 875 else "" 876 ), 877 ( 878 self.seg("FORMAT ") + self.sql(expression, "format") 879 if expression.args.get("format") 880 else "" 881 ), 882 ] 883 884 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 885 params = self.expressions(expression, key="params", flat=True) 886 return self.func(expression.name, *expression.expressions) + f"({params})" 887 888 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 889 return self.func(expression.name, *expression.expressions) 890 891 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 892 return self.anonymousaggfunc_sql(expression) 893 894 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 895 return self.parameterizedagg_sql(expression) 896 897 def placeholder_sql(self, expression: exp.Placeholder) -> str: 898 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 899 900 def oncluster_sql(self, expression: exp.OnCluster) -> str: 901 return f"ON CLUSTER {self.sql(expression, 'this')}" 902 903 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 904 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 905 exp.Properties.Location.POST_NAME 906 ): 907 this_name = self.sql(expression.this, "this") 908 this_properties = " ".join( 909 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 910 ) 911 this_schema = self.schema_columns_sql(expression.this) 912 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 913 914 return super().createable_sql(expression, locations) 915 916 def prewhere_sql(self, expression: exp.PreWhere) -> str: 917 this = self.indent(self.sql(expression, "this")) 918 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 919 920 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 921 this = self.sql(expression, "this") 922 this = f" {this}" if this else "" 923 expr = self.sql(expression, "expression") 924 expr = f" {expr}" if expr else "" 925 index_type = self.sql(expression, "index_type") 926 index_type = f" TYPE {index_type}" if index_type else "" 927 granularity = self.sql(expression, "granularity") 928 granularity = f" GRANULARITY {granularity}" if granularity else "" 929 930 return f"INDEX{this}{expr}{index_type}{granularity}" 931 932 def partition_sql(self, expression: exp.Partition) -> str: 933 return f"PARTITION {self.expressions(expression, flat=True)}" 934 935 def partitionid_sql(self, expression: exp.PartitionId) -> str: 936 return f"ID {self.sql(expression.this)}" 937 938 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 939 return ( 940 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 941 ) 942 943 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 944 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
100class ClickHouse(Dialect): 101 NORMALIZE_FUNCTIONS: bool | str = False 102 NULL_ORDERING = "nulls_are_last" 103 SUPPORTS_USER_DEFINED_TYPES = False 104 SAFE_DIVISION = True 105 LOG_BASE_FIRST: t.Optional[bool] = None 106 FORCE_EARLY_ALIAS_REF_EXPANSION = True 107 108 UNESCAPED_SEQUENCES = { 109 "\\0": "\0", 110 } 111 112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 } 158 159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "EXTRACT": exp.RegexpExtract.from_arg_list, 179 "FORMATDATETIME": _build_date_format, 180 "JSONEXTRACTSTRING": build_json_extract_path( 181 exp.JSONExtractScalar, zero_based_indexing=False 182 ), 183 "MAP": parser.build_var_map, 184 "MATCH": exp.RegexpLike.from_arg_list, 185 "RANDCANONICAL": exp.Rand.from_arg_list, 186 "TUPLE": exp.Struct.from_arg_list, 187 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 189 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 191 "UNIQ": exp.ApproxDistinct.from_arg_list, 192 "XOR": lambda args: exp.Xor(expressions=args), 193 "MD5": exp.MD5Digest.from_arg_list, 194 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 195 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 196 } 197 198 AGG_FUNCTIONS = { 199 "count", 200 "min", 201 "max", 202 "sum", 203 "avg", 204 "any", 205 "stddevPop", 206 "stddevSamp", 207 "varPop", 208 "varSamp", 209 "corr", 210 "covarPop", 211 "covarSamp", 212 "entropy", 213 "exponentialMovingAverage", 214 "intervalLengthSum", 215 "kolmogorovSmirnovTest", 216 "mannWhitneyUTest", 217 "median", 218 "rankCorr", 219 "sumKahan", 220 "studentTTest", 221 "welchTTest", 222 "anyHeavy", 223 "anyLast", 224 "boundingRatio", 225 "first_value", 226 "last_value", 227 "argMin", 228 "argMax", 229 "avgWeighted", 230 "topK", 231 "topKWeighted", 232 "deltaSum", 233 "deltaSumTimestamp", 234 "groupArray", 235 "groupArrayLast", 236 "groupUniqArray", 237 "groupArrayInsertAt", 238 "groupArrayMovingAvg", 239 "groupArrayMovingSum", 240 "groupArraySample", 241 "groupBitAnd", 242 "groupBitOr", 243 "groupBitXor", 244 "groupBitmap", 245 "groupBitmapAnd", 246 "groupBitmapOr", 247 "groupBitmapXor", 248 "sumWithOverflow", 249 "sumMap", 250 "minMap", 251 "maxMap", 252 "skewSamp", 253 "skewPop", 254 "kurtSamp", 255 "kurtPop", 256 "uniq", 257 "uniqExact", 258 "uniqCombined", 259 "uniqCombined64", 260 "uniqHLL12", 261 "uniqTheta", 262 "quantile", 263 "quantiles", 264 "quantileExact", 265 "quantilesExact", 266 "quantileExactLow", 267 "quantilesExactLow", 268 "quantileExactHigh", 269 "quantilesExactHigh", 270 "quantileExactWeighted", 271 "quantilesExactWeighted", 272 "quantileTiming", 273 "quantilesTiming", 274 "quantileTimingWeighted", 275 "quantilesTimingWeighted", 276 "quantileDeterministic", 277 "quantilesDeterministic", 278 "quantileTDigest", 279 "quantilesTDigest", 280 "quantileTDigestWeighted", 281 "quantilesTDigestWeighted", 282 "quantileBFloat16", 283 "quantilesBFloat16", 284 "quantileBFloat16Weighted", 285 "quantilesBFloat16Weighted", 286 "simpleLinearRegression", 287 "stochasticLinearRegression", 288 "stochasticLogisticRegression", 289 "categoricalInformationValue", 290 "contingency", 291 "cramersV", 292 "cramersVBiasCorrected", 293 "theilsU", 294 "maxIntersections", 295 "maxIntersectionsPosition", 296 "meanZTest", 297 "quantileInterpolatedWeighted", 298 "quantilesInterpolatedWeighted", 299 "quantileGK", 300 "quantilesGK", 301 "sparkBar", 302 "sumCount", 303 "largestTriangleThreeBuckets", 304 "histogram", 305 "sequenceMatch", 306 "sequenceCount", 307 "windowFunnel", 308 "retention", 309 "uniqUpTo", 310 "sequenceNextNode", 311 "exponentialTimeDecayedAvg", 312 } 313 314 AGG_FUNCTIONS_SUFFIXES = [ 315 "If", 316 "Array", 317 "ArrayIf", 318 "Map", 319 "SimpleState", 320 "State", 321 "Merge", 322 "MergeState", 323 "ForEach", 324 "Distinct", 325 "OrDefault", 326 "OrNull", 327 "Resample", 328 "ArgMin", 329 "ArgMax", 330 ] 331 332 FUNC_TOKENS = { 333 *parser.Parser.FUNC_TOKENS, 334 TokenType.SET, 335 } 336 337 AGG_FUNC_MAPPING = ( 338 lambda functions, suffixes: { 339 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 340 } 341 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 342 343 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 344 345 FUNCTION_PARSERS = { 346 **parser.Parser.FUNCTION_PARSERS, 347 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 348 "QUANTILE": lambda self: self._parse_quantile(), 349 } 350 351 FUNCTION_PARSERS.pop("EXTRACT") 352 FUNCTION_PARSERS.pop("MATCH") 353 354 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 355 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 356 357 RANGE_PARSERS = { 358 **parser.Parser.RANGE_PARSERS, 359 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 360 and self._parse_in(this, is_global=True), 361 } 362 363 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 364 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 365 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 366 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 367 368 JOIN_KINDS = { 369 *parser.Parser.JOIN_KINDS, 370 TokenType.ANY, 371 TokenType.ASOF, 372 TokenType.ARRAY, 373 } 374 375 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 376 TokenType.ANY, 377 TokenType.ARRAY, 378 TokenType.FINAL, 379 TokenType.FORMAT, 380 TokenType.SETTINGS, 381 } 382 383 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 384 TokenType.FORMAT, 385 } 386 387 LOG_DEFAULTS_TO_LN = True 388 389 QUERY_MODIFIER_PARSERS = { 390 **parser.Parser.QUERY_MODIFIER_PARSERS, 391 TokenType.SETTINGS: lambda self: ( 392 "settings", 393 self._advance() or self._parse_csv(self._parse_assignment), 394 ), 395 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 396 } 397 398 CONSTRAINT_PARSERS = { 399 **parser.Parser.CONSTRAINT_PARSERS, 400 "INDEX": lambda self: self._parse_index_constraint(), 401 "CODEC": lambda self: self._parse_compress(), 402 } 403 404 ALTER_PARSERS = { 405 **parser.Parser.ALTER_PARSERS, 406 "REPLACE": lambda self: self._parse_alter_table_replace(), 407 } 408 409 SCHEMA_UNNAMED_CONSTRAINTS = { 410 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 411 "INDEX", 412 } 413 414 def _parse_assignment(self) -> t.Optional[exp.Expression]: 415 this = super()._parse_assignment() 416 417 if self._match(TokenType.PLACEHOLDER): 418 return self.expression( 419 exp.If, 420 this=this, 421 true=self._parse_assignment(), 422 false=self._match(TokenType.COLON) and self._parse_assignment(), 423 ) 424 425 return this 426 427 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 428 """ 429 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 430 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 431 """ 432 if not self._match(TokenType.L_BRACE): 433 return None 434 435 this = self._parse_id_var() 436 self._match(TokenType.COLON) 437 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 438 self._match_text_seq("IDENTIFIER") and "Identifier" 439 ) 440 441 if not kind: 442 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 443 elif not self._match(TokenType.R_BRACE): 444 self.raise_error("Expecting }") 445 446 return self.expression(exp.Placeholder, this=this, kind=kind) 447 448 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 449 this = super()._parse_in(this) 450 this.set("is_global", is_global) 451 return this 452 453 def _parse_table( 454 self, 455 schema: bool = False, 456 joins: bool = False, 457 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 458 parse_bracket: bool = False, 459 is_db_reference: bool = False, 460 parse_partition: bool = False, 461 ) -> t.Optional[exp.Expression]: 462 this = super()._parse_table( 463 schema=schema, 464 joins=joins, 465 alias_tokens=alias_tokens, 466 parse_bracket=parse_bracket, 467 is_db_reference=is_db_reference, 468 ) 469 470 if self._match(TokenType.FINAL): 471 this = self.expression(exp.Final, this=this) 472 473 return this 474 475 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 476 return super()._parse_position(haystack_first=True) 477 478 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 479 def _parse_cte(self) -> exp.CTE: 480 # WITH <identifier> AS <subquery expression> 481 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 482 483 if not cte: 484 # WITH <expression> AS <identifier> 485 cte = self.expression( 486 exp.CTE, 487 this=self._parse_assignment(), 488 alias=self._parse_table_alias(), 489 scalar=True, 490 ) 491 492 return cte 493 494 def _parse_join_parts( 495 self, 496 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 497 is_global = self._match(TokenType.GLOBAL) and self._prev 498 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 499 500 if kind_pre: 501 kind = self._match_set(self.JOIN_KINDS) and self._prev 502 side = self._match_set(self.JOIN_SIDES) and self._prev 503 return is_global, side, kind 504 505 return ( 506 is_global, 507 self._match_set(self.JOIN_SIDES) and self._prev, 508 self._match_set(self.JOIN_KINDS) and self._prev, 509 ) 510 511 def _parse_join( 512 self, skip_join_token: bool = False, parse_bracket: bool = False 513 ) -> t.Optional[exp.Join]: 514 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 515 if join: 516 join.set("global", join.args.pop("method", None)) 517 518 return join 519 520 def _parse_function( 521 self, 522 functions: t.Optional[t.Dict[str, t.Callable]] = None, 523 anonymous: bool = False, 524 optional_parens: bool = True, 525 any_token: bool = False, 526 ) -> t.Optional[exp.Expression]: 527 expr = super()._parse_function( 528 functions=functions, 529 anonymous=anonymous, 530 optional_parens=optional_parens, 531 any_token=any_token, 532 ) 533 534 func = expr.this if isinstance(expr, exp.Window) else expr 535 536 # Aggregate functions can be split in 2 parts: <func_name><suffix> 537 parts = ( 538 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 539 ) 540 541 if parts: 542 params = self._parse_func_params(func) 543 544 kwargs = { 545 "this": func.this, 546 "expressions": func.expressions, 547 } 548 if parts[1]: 549 kwargs["parts"] = parts 550 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 551 else: 552 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 553 554 kwargs["exp_class"] = exp_class 555 if params: 556 kwargs["params"] = params 557 558 func = self.expression(**kwargs) 559 560 if isinstance(expr, exp.Window): 561 # The window's func was parsed as Anonymous in base parser, fix its 562 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 563 expr.set("this", func) 564 elif params: 565 # Params have blocked super()._parse_function() from parsing the following window 566 # (if that exists) as they're standing between the function call and the window spec 567 expr = self._parse_window(func) 568 else: 569 expr = func 570 571 return expr 572 573 def _parse_func_params( 574 self, this: t.Optional[exp.Func] = None 575 ) -> t.Optional[t.List[exp.Expression]]: 576 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 577 return self._parse_csv(self._parse_lambda) 578 579 if self._match(TokenType.L_PAREN): 580 params = self._parse_csv(self._parse_lambda) 581 self._match_r_paren(this) 582 return params 583 584 return None 585 586 def _parse_quantile(self) -> exp.Quantile: 587 this = self._parse_lambda() 588 params = self._parse_func_params() 589 if params: 590 return self.expression(exp.Quantile, this=params[0], quantile=this) 591 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 592 593 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 594 return super()._parse_wrapped_id_vars(optional=True) 595 596 def _parse_primary_key( 597 self, wrapped_optional: bool = False, in_props: bool = False 598 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 599 return super()._parse_primary_key( 600 wrapped_optional=wrapped_optional or in_props, in_props=in_props 601 ) 602 603 def _parse_on_property(self) -> t.Optional[exp.Expression]: 604 index = self._index 605 if self._match_text_seq("CLUSTER"): 606 this = self._parse_id_var() 607 if this: 608 return self.expression(exp.OnCluster, this=this) 609 else: 610 self._retreat(index) 611 return None 612 613 def _parse_index_constraint( 614 self, kind: t.Optional[str] = None 615 ) -> exp.IndexColumnConstraint: 616 # INDEX name1 expr TYPE type1(args) GRANULARITY value 617 this = self._parse_id_var() 618 expression = self._parse_assignment() 619 620 index_type = self._match_text_seq("TYPE") and ( 621 self._parse_function() or self._parse_var() 622 ) 623 624 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 625 626 return self.expression( 627 exp.IndexColumnConstraint, 628 this=this, 629 expression=expression, 630 index_type=index_type, 631 granularity=granularity, 632 ) 633 634 def _parse_partition(self) -> t.Optional[exp.Partition]: 635 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 636 if not self._match(TokenType.PARTITION): 637 return None 638 639 if self._match_text_seq("ID"): 640 # Corresponds to the PARTITION ID <string_value> syntax 641 expressions: t.List[exp.Expression] = [ 642 self.expression(exp.PartitionId, this=self._parse_string()) 643 ] 644 else: 645 expressions = self._parse_expressions() 646 647 return self.expression(exp.Partition, expressions=expressions) 648 649 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 650 partition = self._parse_partition() 651 652 if not partition or not self._match(TokenType.FROM): 653 return None 654 655 return self.expression( 656 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 657 ) 658 659 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 660 if not self._match_text_seq("PROJECTION"): 661 return None 662 663 return self.expression( 664 exp.ProjectionDef, 665 this=self._parse_id_var(), 666 expression=self._parse_wrapped(self._parse_statement), 667 ) 668 669 def _parse_constraint(self) -> t.Optional[exp.Expression]: 670 return super()._parse_constraint() or self._parse_projection_def() 671 672 class Generator(generator.Generator): 673 QUERY_HINTS = False 674 STRUCT_DELIMITER = ("(", ")") 675 NVL2_SUPPORTED = False 676 TABLESAMPLE_REQUIRES_PARENS = False 677 TABLESAMPLE_SIZE_IS_ROWS = False 678 TABLESAMPLE_KEYWORDS = "SAMPLE" 679 LAST_DAY_SUPPORTS_DATE_PART = False 680 CAN_IMPLEMENT_ARRAY_ANY = True 681 SUPPORTS_TO_NUMBER = False 682 JOIN_HINTS = False 683 TABLE_HINTS = False 684 EXPLICIT_SET_OP = True 685 GROUPINGS_SEP = "" 686 SET_OP_MODIFIERS = False 687 688 STRING_TYPE_MAPPING = { 689 exp.DataType.Type.CHAR: "String", 690 exp.DataType.Type.LONGBLOB: "String", 691 exp.DataType.Type.LONGTEXT: "String", 692 exp.DataType.Type.MEDIUMBLOB: "String", 693 exp.DataType.Type.MEDIUMTEXT: "String", 694 exp.DataType.Type.TINYBLOB: "String", 695 exp.DataType.Type.TINYTEXT: "String", 696 exp.DataType.Type.TEXT: "String", 697 exp.DataType.Type.VARBINARY: "String", 698 exp.DataType.Type.VARCHAR: "String", 699 } 700 701 SUPPORTED_JSON_PATH_PARTS = { 702 exp.JSONPathKey, 703 exp.JSONPathRoot, 704 exp.JSONPathSubscript, 705 } 706 707 TYPE_MAPPING = { 708 **generator.Generator.TYPE_MAPPING, 709 **STRING_TYPE_MAPPING, 710 exp.DataType.Type.ARRAY: "Array", 711 exp.DataType.Type.BIGINT: "Int64", 712 exp.DataType.Type.DATE32: "Date32", 713 exp.DataType.Type.DATETIME64: "DateTime64", 714 exp.DataType.Type.DOUBLE: "Float64", 715 exp.DataType.Type.ENUM: "Enum", 716 exp.DataType.Type.ENUM8: "Enum8", 717 exp.DataType.Type.ENUM16: "Enum16", 718 exp.DataType.Type.FIXEDSTRING: "FixedString", 719 exp.DataType.Type.FLOAT: "Float32", 720 exp.DataType.Type.INT: "Int32", 721 exp.DataType.Type.MEDIUMINT: "Int32", 722 exp.DataType.Type.INT128: "Int128", 723 exp.DataType.Type.INT256: "Int256", 724 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 725 exp.DataType.Type.MAP: "Map", 726 exp.DataType.Type.NESTED: "Nested", 727 exp.DataType.Type.NULLABLE: "Nullable", 728 exp.DataType.Type.SMALLINT: "Int16", 729 exp.DataType.Type.STRUCT: "Tuple", 730 exp.DataType.Type.TINYINT: "Int8", 731 exp.DataType.Type.UBIGINT: "UInt64", 732 exp.DataType.Type.UINT: "UInt32", 733 exp.DataType.Type.UINT128: "UInt128", 734 exp.DataType.Type.UINT256: "UInt256", 735 exp.DataType.Type.USMALLINT: "UInt16", 736 exp.DataType.Type.UTINYINT: "UInt8", 737 exp.DataType.Type.IPV4: "IPv4", 738 exp.DataType.Type.IPV6: "IPv6", 739 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 740 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 741 } 742 743 TRANSFORMS = { 744 **generator.Generator.TRANSFORMS, 745 exp.AnyValue: rename_func("any"), 746 exp.ApproxDistinct: rename_func("uniq"), 747 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 748 exp.ArraySize: rename_func("LENGTH"), 749 exp.ArraySum: rename_func("arraySum"), 750 exp.ArgMax: arg_max_or_min_no_count("argMax"), 751 exp.ArgMin: arg_max_or_min_no_count("argMin"), 752 exp.Array: inline_array_sql, 753 exp.CastToStrType: rename_func("CAST"), 754 exp.CountIf: rename_func("countIf"), 755 exp.CompressColumnConstraint: lambda self, 756 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 757 exp.ComputedColumnConstraint: lambda self, 758 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 759 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 760 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 761 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 762 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 763 exp.Explode: rename_func("arrayJoin"), 764 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 765 exp.IsNan: rename_func("isNaN"), 766 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 767 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 768 exp.JSONPathKey: json_path_key_only_name, 769 exp.JSONPathRoot: lambda *_: "", 770 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 771 exp.Nullif: rename_func("nullIf"), 772 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 773 exp.Pivot: no_pivot_sql, 774 exp.Quantile: _quantile_sql, 775 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 776 exp.Rand: rename_func("randCanonical"), 777 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 778 exp.StartsWith: rename_func("startsWith"), 779 exp.StrPosition: lambda self, e: self.func( 780 "position", e.this, e.args.get("substr"), e.args.get("position") 781 ), 782 exp.TimeToStr: lambda self, e: self.func( 783 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 784 ), 785 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 786 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 787 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 788 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 789 exp.MD5Digest: rename_func("MD5"), 790 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 791 exp.SHA: rename_func("SHA1"), 792 exp.SHA2: sha256_sql, 793 exp.UnixToTime: _unix_to_time_sql, 794 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 795 exp.Variance: rename_func("varSamp"), 796 exp.Stddev: rename_func("stddevSamp"), 797 } 798 799 PROPERTIES_LOCATION = { 800 **generator.Generator.PROPERTIES_LOCATION, 801 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 802 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 803 exp.OnCluster: exp.Properties.Location.POST_NAME, 804 } 805 806 # there's no list in docs, but it can be found in Clickhouse code 807 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 808 ON_CLUSTER_TARGETS = { 809 "DATABASE", 810 "TABLE", 811 "VIEW", 812 "DICTIONARY", 813 "INDEX", 814 "FUNCTION", 815 "NAMED COLLECTION", 816 } 817 818 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 819 this = self.json_path_part(expression.this) 820 return str(int(this) + 1) if is_int(this) else this 821 822 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 823 return f"AS {self.sql(expression, 'this')}" 824 825 def _any_to_has( 826 self, 827 expression: exp.EQ | exp.NEQ, 828 default: t.Callable[[t.Any], str], 829 prefix: str = "", 830 ) -> str: 831 if isinstance(expression.left, exp.Any): 832 arr = expression.left 833 this = expression.right 834 elif isinstance(expression.right, exp.Any): 835 arr = expression.right 836 this = expression.left 837 else: 838 return default(expression) 839 840 return prefix + self.func("has", arr.this.unnest(), this) 841 842 def eq_sql(self, expression: exp.EQ) -> str: 843 return self._any_to_has(expression, super().eq_sql) 844 845 def neq_sql(self, expression: exp.NEQ) -> str: 846 return self._any_to_has(expression, super().neq_sql, "NOT ") 847 848 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 849 # Manually add a flag to make the search case-insensitive 850 regex = self.func("CONCAT", "'(?i)'", expression.expression) 851 return self.func("match", expression.this, regex) 852 853 def datatype_sql(self, expression: exp.DataType) -> str: 854 # String is the standard ClickHouse type, every other variant is just an alias. 855 # Additionally, any supplied length parameter will be ignored. 856 # 857 # https://clickhouse.com/docs/en/sql-reference/data-types/string 858 if expression.this in self.STRING_TYPE_MAPPING: 859 return "String" 860 861 return super().datatype_sql(expression) 862 863 def cte_sql(self, expression: exp.CTE) -> str: 864 if expression.args.get("scalar"): 865 this = self.sql(expression, "this") 866 alias = self.sql(expression, "alias") 867 return f"{this} AS {alias}" 868 869 return super().cte_sql(expression) 870 871 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 872 return super().after_limit_modifiers(expression) + [ 873 ( 874 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 875 if expression.args.get("settings") 876 else "" 877 ), 878 ( 879 self.seg("FORMAT ") + self.sql(expression, "format") 880 if expression.args.get("format") 881 else "" 882 ), 883 ] 884 885 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 886 params = self.expressions(expression, key="params", flat=True) 887 return self.func(expression.name, *expression.expressions) + f"({params})" 888 889 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 890 return self.func(expression.name, *expression.expressions) 891 892 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 893 return self.anonymousaggfunc_sql(expression) 894 895 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 896 return self.parameterizedagg_sql(expression) 897 898 def placeholder_sql(self, expression: exp.Placeholder) -> str: 899 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 900 901 def oncluster_sql(self, expression: exp.OnCluster) -> str: 902 return f"ON CLUSTER {self.sql(expression, 'this')}" 903 904 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 905 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 906 exp.Properties.Location.POST_NAME 907 ): 908 this_name = self.sql(expression.this, "this") 909 this_properties = " ".join( 910 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 911 ) 912 this_schema = self.schema_columns_sql(expression.this) 913 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 914 915 return super().createable_sql(expression, locations) 916 917 def prewhere_sql(self, expression: exp.PreWhere) -> str: 918 this = self.indent(self.sql(expression, "this")) 919 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 920 921 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 922 this = self.sql(expression, "this") 923 this = f" {this}" if this else "" 924 expr = self.sql(expression, "expression") 925 expr = f" {expr}" if expr else "" 926 index_type = self.sql(expression, "index_type") 927 index_type = f" TYPE {index_type}" if index_type else "" 928 granularity = self.sql(expression, "granularity") 929 granularity = f" GRANULARITY {granularity}" if granularity else "" 930 931 return f"INDEX{this}{expr}{index_type}{granularity}" 932 933 def partition_sql(self, expression: exp.Partition) -> str: 934 return f"PARTITION {self.expressions(expression, flat=True)}" 935 936 def partitionid_sql(self, expression: exp.PartitionId) -> str: 937 return f"ID {self.sql(expression.this)}" 938 939 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 940 return ( 941 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 942 ) 943 944 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 945 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- NORMALIZATION_STRATEGY
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "EXTRACT": exp.RegexpExtract.from_arg_list, 179 "FORMATDATETIME": _build_date_format, 180 "JSONEXTRACTSTRING": build_json_extract_path( 181 exp.JSONExtractScalar, zero_based_indexing=False 182 ), 183 "MAP": parser.build_var_map, 184 "MATCH": exp.RegexpLike.from_arg_list, 185 "RANDCANONICAL": exp.Rand.from_arg_list, 186 "TUPLE": exp.Struct.from_arg_list, 187 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 189 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 191 "UNIQ": exp.ApproxDistinct.from_arg_list, 192 "XOR": lambda args: exp.Xor(expressions=args), 193 "MD5": exp.MD5Digest.from_arg_list, 194 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 195 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 196 } 197 198 AGG_FUNCTIONS = { 199 "count", 200 "min", 201 "max", 202 "sum", 203 "avg", 204 "any", 205 "stddevPop", 206 "stddevSamp", 207 "varPop", 208 "varSamp", 209 "corr", 210 "covarPop", 211 "covarSamp", 212 "entropy", 213 "exponentialMovingAverage", 214 "intervalLengthSum", 215 "kolmogorovSmirnovTest", 216 "mannWhitneyUTest", 217 "median", 218 "rankCorr", 219 "sumKahan", 220 "studentTTest", 221 "welchTTest", 222 "anyHeavy", 223 "anyLast", 224 "boundingRatio", 225 "first_value", 226 "last_value", 227 "argMin", 228 "argMax", 229 "avgWeighted", 230 "topK", 231 "topKWeighted", 232 "deltaSum", 233 "deltaSumTimestamp", 234 "groupArray", 235 "groupArrayLast", 236 "groupUniqArray", 237 "groupArrayInsertAt", 238 "groupArrayMovingAvg", 239 "groupArrayMovingSum", 240 "groupArraySample", 241 "groupBitAnd", 242 "groupBitOr", 243 "groupBitXor", 244 "groupBitmap", 245 "groupBitmapAnd", 246 "groupBitmapOr", 247 "groupBitmapXor", 248 "sumWithOverflow", 249 "sumMap", 250 "minMap", 251 "maxMap", 252 "skewSamp", 253 "skewPop", 254 "kurtSamp", 255 "kurtPop", 256 "uniq", 257 "uniqExact", 258 "uniqCombined", 259 "uniqCombined64", 260 "uniqHLL12", 261 "uniqTheta", 262 "quantile", 263 "quantiles", 264 "quantileExact", 265 "quantilesExact", 266 "quantileExactLow", 267 "quantilesExactLow", 268 "quantileExactHigh", 269 "quantilesExactHigh", 270 "quantileExactWeighted", 271 "quantilesExactWeighted", 272 "quantileTiming", 273 "quantilesTiming", 274 "quantileTimingWeighted", 275 "quantilesTimingWeighted", 276 "quantileDeterministic", 277 "quantilesDeterministic", 278 "quantileTDigest", 279 "quantilesTDigest", 280 "quantileTDigestWeighted", 281 "quantilesTDigestWeighted", 282 "quantileBFloat16", 283 "quantilesBFloat16", 284 "quantileBFloat16Weighted", 285 "quantilesBFloat16Weighted", 286 "simpleLinearRegression", 287 "stochasticLinearRegression", 288 "stochasticLogisticRegression", 289 "categoricalInformationValue", 290 "contingency", 291 "cramersV", 292 "cramersVBiasCorrected", 293 "theilsU", 294 "maxIntersections", 295 "maxIntersectionsPosition", 296 "meanZTest", 297 "quantileInterpolatedWeighted", 298 "quantilesInterpolatedWeighted", 299 "quantileGK", 300 "quantilesGK", 301 "sparkBar", 302 "sumCount", 303 "largestTriangleThreeBuckets", 304 "histogram", 305 "sequenceMatch", 306 "sequenceCount", 307 "windowFunnel", 308 "retention", 309 "uniqUpTo", 310 "sequenceNextNode", 311 "exponentialTimeDecayedAvg", 312 } 313 314 AGG_FUNCTIONS_SUFFIXES = [ 315 "If", 316 "Array", 317 "ArrayIf", 318 "Map", 319 "SimpleState", 320 "State", 321 "Merge", 322 "MergeState", 323 "ForEach", 324 "Distinct", 325 "OrDefault", 326 "OrNull", 327 "Resample", 328 "ArgMin", 329 "ArgMax", 330 ] 331 332 FUNC_TOKENS = { 333 *parser.Parser.FUNC_TOKENS, 334 TokenType.SET, 335 } 336 337 AGG_FUNC_MAPPING = ( 338 lambda functions, suffixes: { 339 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 340 } 341 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 342 343 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 344 345 FUNCTION_PARSERS = { 346 **parser.Parser.FUNCTION_PARSERS, 347 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 348 "QUANTILE": lambda self: self._parse_quantile(), 349 } 350 351 FUNCTION_PARSERS.pop("EXTRACT") 352 FUNCTION_PARSERS.pop("MATCH") 353 354 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 355 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 356 357 RANGE_PARSERS = { 358 **parser.Parser.RANGE_PARSERS, 359 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 360 and self._parse_in(this, is_global=True), 361 } 362 363 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 364 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 365 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 366 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 367 368 JOIN_KINDS = { 369 *parser.Parser.JOIN_KINDS, 370 TokenType.ANY, 371 TokenType.ASOF, 372 TokenType.ARRAY, 373 } 374 375 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 376 TokenType.ANY, 377 TokenType.ARRAY, 378 TokenType.FINAL, 379 TokenType.FORMAT, 380 TokenType.SETTINGS, 381 } 382 383 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 384 TokenType.FORMAT, 385 } 386 387 LOG_DEFAULTS_TO_LN = True 388 389 QUERY_MODIFIER_PARSERS = { 390 **parser.Parser.QUERY_MODIFIER_PARSERS, 391 TokenType.SETTINGS: lambda self: ( 392 "settings", 393 self._advance() or self._parse_csv(self._parse_assignment), 394 ), 395 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 396 } 397 398 CONSTRAINT_PARSERS = { 399 **parser.Parser.CONSTRAINT_PARSERS, 400 "INDEX": lambda self: self._parse_index_constraint(), 401 "CODEC": lambda self: self._parse_compress(), 402 } 403 404 ALTER_PARSERS = { 405 **parser.Parser.ALTER_PARSERS, 406 "REPLACE": lambda self: self._parse_alter_table_replace(), 407 } 408 409 SCHEMA_UNNAMED_CONSTRAINTS = { 410 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 411 "INDEX", 412 } 413 414 def _parse_assignment(self) -> t.Optional[exp.Expression]: 415 this = super()._parse_assignment() 416 417 if self._match(TokenType.PLACEHOLDER): 418 return self.expression( 419 exp.If, 420 this=this, 421 true=self._parse_assignment(), 422 false=self._match(TokenType.COLON) and self._parse_assignment(), 423 ) 424 425 return this 426 427 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 428 """ 429 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 430 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 431 """ 432 if not self._match(TokenType.L_BRACE): 433 return None 434 435 this = self._parse_id_var() 436 self._match(TokenType.COLON) 437 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 438 self._match_text_seq("IDENTIFIER") and "Identifier" 439 ) 440 441 if not kind: 442 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 443 elif not self._match(TokenType.R_BRACE): 444 self.raise_error("Expecting }") 445 446 return self.expression(exp.Placeholder, this=this, kind=kind) 447 448 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 449 this = super()._parse_in(this) 450 this.set("is_global", is_global) 451 return this 452 453 def _parse_table( 454 self, 455 schema: bool = False, 456 joins: bool = False, 457 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 458 parse_bracket: bool = False, 459 is_db_reference: bool = False, 460 parse_partition: bool = False, 461 ) -> t.Optional[exp.Expression]: 462 this = super()._parse_table( 463 schema=schema, 464 joins=joins, 465 alias_tokens=alias_tokens, 466 parse_bracket=parse_bracket, 467 is_db_reference=is_db_reference, 468 ) 469 470 if self._match(TokenType.FINAL): 471 this = self.expression(exp.Final, this=this) 472 473 return this 474 475 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 476 return super()._parse_position(haystack_first=True) 477 478 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 479 def _parse_cte(self) -> exp.CTE: 480 # WITH <identifier> AS <subquery expression> 481 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 482 483 if not cte: 484 # WITH <expression> AS <identifier> 485 cte = self.expression( 486 exp.CTE, 487 this=self._parse_assignment(), 488 alias=self._parse_table_alias(), 489 scalar=True, 490 ) 491 492 return cte 493 494 def _parse_join_parts( 495 self, 496 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 497 is_global = self._match(TokenType.GLOBAL) and self._prev 498 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 499 500 if kind_pre: 501 kind = self._match_set(self.JOIN_KINDS) and self._prev 502 side = self._match_set(self.JOIN_SIDES) and self._prev 503 return is_global, side, kind 504 505 return ( 506 is_global, 507 self._match_set(self.JOIN_SIDES) and self._prev, 508 self._match_set(self.JOIN_KINDS) and self._prev, 509 ) 510 511 def _parse_join( 512 self, skip_join_token: bool = False, parse_bracket: bool = False 513 ) -> t.Optional[exp.Join]: 514 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 515 if join: 516 join.set("global", join.args.pop("method", None)) 517 518 return join 519 520 def _parse_function( 521 self, 522 functions: t.Optional[t.Dict[str, t.Callable]] = None, 523 anonymous: bool = False, 524 optional_parens: bool = True, 525 any_token: bool = False, 526 ) -> t.Optional[exp.Expression]: 527 expr = super()._parse_function( 528 functions=functions, 529 anonymous=anonymous, 530 optional_parens=optional_parens, 531 any_token=any_token, 532 ) 533 534 func = expr.this if isinstance(expr, exp.Window) else expr 535 536 # Aggregate functions can be split in 2 parts: <func_name><suffix> 537 parts = ( 538 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 539 ) 540 541 if parts: 542 params = self._parse_func_params(func) 543 544 kwargs = { 545 "this": func.this, 546 "expressions": func.expressions, 547 } 548 if parts[1]: 549 kwargs["parts"] = parts 550 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 551 else: 552 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 553 554 kwargs["exp_class"] = exp_class 555 if params: 556 kwargs["params"] = params 557 558 func = self.expression(**kwargs) 559 560 if isinstance(expr, exp.Window): 561 # The window's func was parsed as Anonymous in base parser, fix its 562 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 563 expr.set("this", func) 564 elif params: 565 # Params have blocked super()._parse_function() from parsing the following window 566 # (if that exists) as they're standing between the function call and the window spec 567 expr = self._parse_window(func) 568 else: 569 expr = func 570 571 return expr 572 573 def _parse_func_params( 574 self, this: t.Optional[exp.Func] = None 575 ) -> t.Optional[t.List[exp.Expression]]: 576 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 577 return self._parse_csv(self._parse_lambda) 578 579 if self._match(TokenType.L_PAREN): 580 params = self._parse_csv(self._parse_lambda) 581 self._match_r_paren(this) 582 return params 583 584 return None 585 586 def _parse_quantile(self) -> exp.Quantile: 587 this = self._parse_lambda() 588 params = self._parse_func_params() 589 if params: 590 return self.expression(exp.Quantile, this=params[0], quantile=this) 591 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 592 593 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 594 return super()._parse_wrapped_id_vars(optional=True) 595 596 def _parse_primary_key( 597 self, wrapped_optional: bool = False, in_props: bool = False 598 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 599 return super()._parse_primary_key( 600 wrapped_optional=wrapped_optional or in_props, in_props=in_props 601 ) 602 603 def _parse_on_property(self) -> t.Optional[exp.Expression]: 604 index = self._index 605 if self._match_text_seq("CLUSTER"): 606 this = self._parse_id_var() 607 if this: 608 return self.expression(exp.OnCluster, this=this) 609 else: 610 self._retreat(index) 611 return None 612 613 def _parse_index_constraint( 614 self, kind: t.Optional[str] = None 615 ) -> exp.IndexColumnConstraint: 616 # INDEX name1 expr TYPE type1(args) GRANULARITY value 617 this = self._parse_id_var() 618 expression = self._parse_assignment() 619 620 index_type = self._match_text_seq("TYPE") and ( 621 self._parse_function() or self._parse_var() 622 ) 623 624 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 625 626 return self.expression( 627 exp.IndexColumnConstraint, 628 this=this, 629 expression=expression, 630 index_type=index_type, 631 granularity=granularity, 632 ) 633 634 def _parse_partition(self) -> t.Optional[exp.Partition]: 635 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 636 if not self._match(TokenType.PARTITION): 637 return None 638 639 if self._match_text_seq("ID"): 640 # Corresponds to the PARTITION ID <string_value> syntax 641 expressions: t.List[exp.Expression] = [ 642 self.expression(exp.PartitionId, this=self._parse_string()) 643 ] 644 else: 645 expressions = self._parse_expressions() 646 647 return self.expression(exp.Partition, expressions=expressions) 648 649 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 650 partition = self._parse_partition() 651 652 if not partition or not self._match(TokenType.FROM): 653 return None 654 655 return self.expression( 656 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 657 ) 658 659 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 660 if not self._match_text_seq("PROJECTION"): 661 return None 662 663 return self.expression( 664 exp.ProjectionDef, 665 this=self._parse_id_var(), 666 expression=self._parse_wrapped(self._parse_statement), 667 ) 668 669 def _parse_constraint(self) -> t.Optional[exp.Expression]: 670 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
672 class Generator(generator.Generator): 673 QUERY_HINTS = False 674 STRUCT_DELIMITER = ("(", ")") 675 NVL2_SUPPORTED = False 676 TABLESAMPLE_REQUIRES_PARENS = False 677 TABLESAMPLE_SIZE_IS_ROWS = False 678 TABLESAMPLE_KEYWORDS = "SAMPLE" 679 LAST_DAY_SUPPORTS_DATE_PART = False 680 CAN_IMPLEMENT_ARRAY_ANY = True 681 SUPPORTS_TO_NUMBER = False 682 JOIN_HINTS = False 683 TABLE_HINTS = False 684 EXPLICIT_SET_OP = True 685 GROUPINGS_SEP = "" 686 SET_OP_MODIFIERS = False 687 688 STRING_TYPE_MAPPING = { 689 exp.DataType.Type.CHAR: "String", 690 exp.DataType.Type.LONGBLOB: "String", 691 exp.DataType.Type.LONGTEXT: "String", 692 exp.DataType.Type.MEDIUMBLOB: "String", 693 exp.DataType.Type.MEDIUMTEXT: "String", 694 exp.DataType.Type.TINYBLOB: "String", 695 exp.DataType.Type.TINYTEXT: "String", 696 exp.DataType.Type.TEXT: "String", 697 exp.DataType.Type.VARBINARY: "String", 698 exp.DataType.Type.VARCHAR: "String", 699 } 700 701 SUPPORTED_JSON_PATH_PARTS = { 702 exp.JSONPathKey, 703 exp.JSONPathRoot, 704 exp.JSONPathSubscript, 705 } 706 707 TYPE_MAPPING = { 708 **generator.Generator.TYPE_MAPPING, 709 **STRING_TYPE_MAPPING, 710 exp.DataType.Type.ARRAY: "Array", 711 exp.DataType.Type.BIGINT: "Int64", 712 exp.DataType.Type.DATE32: "Date32", 713 exp.DataType.Type.DATETIME64: "DateTime64", 714 exp.DataType.Type.DOUBLE: "Float64", 715 exp.DataType.Type.ENUM: "Enum", 716 exp.DataType.Type.ENUM8: "Enum8", 717 exp.DataType.Type.ENUM16: "Enum16", 718 exp.DataType.Type.FIXEDSTRING: "FixedString", 719 exp.DataType.Type.FLOAT: "Float32", 720 exp.DataType.Type.INT: "Int32", 721 exp.DataType.Type.MEDIUMINT: "Int32", 722 exp.DataType.Type.INT128: "Int128", 723 exp.DataType.Type.INT256: "Int256", 724 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 725 exp.DataType.Type.MAP: "Map", 726 exp.DataType.Type.NESTED: "Nested", 727 exp.DataType.Type.NULLABLE: "Nullable", 728 exp.DataType.Type.SMALLINT: "Int16", 729 exp.DataType.Type.STRUCT: "Tuple", 730 exp.DataType.Type.TINYINT: "Int8", 731 exp.DataType.Type.UBIGINT: "UInt64", 732 exp.DataType.Type.UINT: "UInt32", 733 exp.DataType.Type.UINT128: "UInt128", 734 exp.DataType.Type.UINT256: "UInt256", 735 exp.DataType.Type.USMALLINT: "UInt16", 736 exp.DataType.Type.UTINYINT: "UInt8", 737 exp.DataType.Type.IPV4: "IPv4", 738 exp.DataType.Type.IPV6: "IPv6", 739 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 740 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 741 } 742 743 TRANSFORMS = { 744 **generator.Generator.TRANSFORMS, 745 exp.AnyValue: rename_func("any"), 746 exp.ApproxDistinct: rename_func("uniq"), 747 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 748 exp.ArraySize: rename_func("LENGTH"), 749 exp.ArraySum: rename_func("arraySum"), 750 exp.ArgMax: arg_max_or_min_no_count("argMax"), 751 exp.ArgMin: arg_max_or_min_no_count("argMin"), 752 exp.Array: inline_array_sql, 753 exp.CastToStrType: rename_func("CAST"), 754 exp.CountIf: rename_func("countIf"), 755 exp.CompressColumnConstraint: lambda self, 756 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 757 exp.ComputedColumnConstraint: lambda self, 758 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 759 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 760 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 761 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 762 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 763 exp.Explode: rename_func("arrayJoin"), 764 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 765 exp.IsNan: rename_func("isNaN"), 766 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 767 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 768 exp.JSONPathKey: json_path_key_only_name, 769 exp.JSONPathRoot: lambda *_: "", 770 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 771 exp.Nullif: rename_func("nullIf"), 772 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 773 exp.Pivot: no_pivot_sql, 774 exp.Quantile: _quantile_sql, 775 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 776 exp.Rand: rename_func("randCanonical"), 777 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 778 exp.StartsWith: rename_func("startsWith"), 779 exp.StrPosition: lambda self, e: self.func( 780 "position", e.this, e.args.get("substr"), e.args.get("position") 781 ), 782 exp.TimeToStr: lambda self, e: self.func( 783 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 784 ), 785 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 786 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 787 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 788 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 789 exp.MD5Digest: rename_func("MD5"), 790 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 791 exp.SHA: rename_func("SHA1"), 792 exp.SHA2: sha256_sql, 793 exp.UnixToTime: _unix_to_time_sql, 794 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 795 exp.Variance: rename_func("varSamp"), 796 exp.Stddev: rename_func("stddevSamp"), 797 } 798 799 PROPERTIES_LOCATION = { 800 **generator.Generator.PROPERTIES_LOCATION, 801 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 802 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 803 exp.OnCluster: exp.Properties.Location.POST_NAME, 804 } 805 806 # there's no list in docs, but it can be found in Clickhouse code 807 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 808 ON_CLUSTER_TARGETS = { 809 "DATABASE", 810 "TABLE", 811 "VIEW", 812 "DICTIONARY", 813 "INDEX", 814 "FUNCTION", 815 "NAMED COLLECTION", 816 } 817 818 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 819 this = self.json_path_part(expression.this) 820 return str(int(this) + 1) if is_int(this) else this 821 822 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 823 return f"AS {self.sql(expression, 'this')}" 824 825 def _any_to_has( 826 self, 827 expression: exp.EQ | exp.NEQ, 828 default: t.Callable[[t.Any], str], 829 prefix: str = "", 830 ) -> str: 831 if isinstance(expression.left, exp.Any): 832 arr = expression.left 833 this = expression.right 834 elif isinstance(expression.right, exp.Any): 835 arr = expression.right 836 this = expression.left 837 else: 838 return default(expression) 839 840 return prefix + self.func("has", arr.this.unnest(), this) 841 842 def eq_sql(self, expression: exp.EQ) -> str: 843 return self._any_to_has(expression, super().eq_sql) 844 845 def neq_sql(self, expression: exp.NEQ) -> str: 846 return self._any_to_has(expression, super().neq_sql, "NOT ") 847 848 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 849 # Manually add a flag to make the search case-insensitive 850 regex = self.func("CONCAT", "'(?i)'", expression.expression) 851 return self.func("match", expression.this, regex) 852 853 def datatype_sql(self, expression: exp.DataType) -> str: 854 # String is the standard ClickHouse type, every other variant is just an alias. 855 # Additionally, any supplied length parameter will be ignored. 856 # 857 # https://clickhouse.com/docs/en/sql-reference/data-types/string 858 if expression.this in self.STRING_TYPE_MAPPING: 859 return "String" 860 861 return super().datatype_sql(expression) 862 863 def cte_sql(self, expression: exp.CTE) -> str: 864 if expression.args.get("scalar"): 865 this = self.sql(expression, "this") 866 alias = self.sql(expression, "alias") 867 return f"{this} AS {alias}" 868 869 return super().cte_sql(expression) 870 871 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 872 return super().after_limit_modifiers(expression) + [ 873 ( 874 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 875 if expression.args.get("settings") 876 else "" 877 ), 878 ( 879 self.seg("FORMAT ") + self.sql(expression, "format") 880 if expression.args.get("format") 881 else "" 882 ), 883 ] 884 885 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 886 params = self.expressions(expression, key="params", flat=True) 887 return self.func(expression.name, *expression.expressions) + f"({params})" 888 889 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 890 return self.func(expression.name, *expression.expressions) 891 892 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 893 return self.anonymousaggfunc_sql(expression) 894 895 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 896 return self.parameterizedagg_sql(expression) 897 898 def placeholder_sql(self, expression: exp.Placeholder) -> str: 899 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 900 901 def oncluster_sql(self, expression: exp.OnCluster) -> str: 902 return f"ON CLUSTER {self.sql(expression, 'this')}" 903 904 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 905 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 906 exp.Properties.Location.POST_NAME 907 ): 908 this_name = self.sql(expression.this, "this") 909 this_properties = " ".join( 910 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 911 ) 912 this_schema = self.schema_columns_sql(expression.this) 913 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 914 915 return super().createable_sql(expression, locations) 916 917 def prewhere_sql(self, expression: exp.PreWhere) -> str: 918 this = self.indent(self.sql(expression, "this")) 919 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 920 921 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 922 this = self.sql(expression, "this") 923 this = f" {this}" if this else "" 924 expr = self.sql(expression, "expression") 925 expr = f" {expr}" if expr else "" 926 index_type = self.sql(expression, "index_type") 927 index_type = f" TYPE {index_type}" if index_type else "" 928 granularity = self.sql(expression, "granularity") 929 granularity = f" GRANULARITY {granularity}" if granularity else "" 930 931 return f"INDEX{this}{expr}{index_type}{granularity}" 932 933 def partition_sql(self, expression: exp.Partition) -> str: 934 return f"PARTITION {self.expressions(expression, flat=True)}" 935 936 def partitionid_sql(self, expression: exp.PartitionId) -> str: 937 return f"ID {self.sql(expression.this)}" 938 939 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 940 return ( 941 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 942 ) 943 944 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 945 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
853 def datatype_sql(self, expression: exp.DataType) -> str: 854 # String is the standard ClickHouse type, every other variant is just an alias. 855 # Additionally, any supplied length parameter will be ignored. 856 # 857 # https://clickhouse.com/docs/en/sql-reference/data-types/string 858 if expression.this in self.STRING_TYPE_MAPPING: 859 return "String" 860 861 return super().datatype_sql(expression)
871 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 872 return super().after_limit_modifiers(expression) + [ 873 ( 874 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 875 if expression.args.get("settings") 876 else "" 877 ), 878 ( 879 self.seg("FORMAT ") + self.sql(expression, "format") 880 if expression.args.get("format") 881 else "" 882 ), 883 ]
904 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 905 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 906 exp.Properties.Location.POST_NAME 907 ): 908 this_name = self.sql(expression.this, "this") 909 this_properties = " ".join( 910 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 911 ) 912 this_schema = self.schema_columns_sql(expression.this) 913 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 914 915 return super().createable_sql(expression, locations)
921 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 922 this = self.sql(expression, "this") 923 this = f" {this}" if this else "" 924 expr = self.sql(expression, "expression") 925 expr = f" {expr}" if expr else "" 926 index_type = self.sql(expression, "index_type") 927 index_type = f" TYPE {index_type}" if index_type else "" 928 granularity = self.sql(expression, "granularity") 929 granularity = f" GRANULARITY {granularity}" if granularity else "" 930 931 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- strtodate_sql
- strtotime_sql