sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 126 if len(args) == 3: 127 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 128 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 129 130 131def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 132 regexp_replace = exp.RegexpReplace.from_arg_list(args) 133 134 if not regexp_replace.args.get("replacement"): 135 regexp_replace.set("replacement", exp.Literal.string("")) 136 137 return regexp_replace 138 139 140def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 141 def _parse(self: Snowflake.Parser) -> exp.Show: 142 return self._parse_show_snowflake(*args, **kwargs) 143 144 return _parse 145 146 147def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 148 trunc = date_trunc_to_time(args) 149 trunc.set("unit", map_date_part(trunc.args["unit"])) 150 return trunc 151 152 153def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 154 """ 155 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 156 so we need to unqualify them. 157 158 Example: 159 >>> from sqlglot import parse_one 160 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 161 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 162 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 163 """ 164 if isinstance(expression, exp.Pivot) and expression.unpivot: 165 expression = transforms.unqualify_columns(expression) 166 167 return expression 168 169 170def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 171 assert isinstance(expression, exp.Create) 172 173 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 174 if expression.this in exp.DataType.NESTED_TYPES: 175 expression.set("expressions", None) 176 return expression 177 178 props = expression.args.get("properties") 179 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 180 for schema_expression in expression.this.expressions: 181 if isinstance(schema_expression, exp.ColumnDef): 182 column_type = schema_expression.kind 183 if isinstance(column_type, exp.DataType): 184 column_type.transform(_flatten_structured_type, copy=False) 185 186 return expression 187 188 189class Snowflake(Dialect): 190 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 191 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 192 NULL_ORDERING = "nulls_are_large" 193 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 194 SUPPORTS_USER_DEFINED_TYPES = False 195 SUPPORTS_SEMI_ANTI_JOIN = False 196 PREFER_CTE_ALIAS_COLUMN = True 197 TABLESAMPLE_SIZE_IS_PERCENT = True 198 COPY_PARAMS_ARE_CSV = False 199 200 TIME_MAPPING = { 201 "YYYY": "%Y", 202 "yyyy": "%Y", 203 "YY": "%y", 204 "yy": "%y", 205 "MMMM": "%B", 206 "mmmm": "%B", 207 "MON": "%b", 208 "mon": "%b", 209 "MM": "%m", 210 "mm": "%m", 211 "DD": "%d", 212 "dd": "%-d", 213 "DY": "%a", 214 "dy": "%w", 215 "HH24": "%H", 216 "hh24": "%H", 217 "HH12": "%I", 218 "hh12": "%I", 219 "MI": "%M", 220 "mi": "%M", 221 "SS": "%S", 222 "ss": "%S", 223 "FF": "%f", 224 "ff": "%f", 225 "FF6": "%f", 226 "ff6": "%f", 227 } 228 229 def quote_identifier(self, expression: E, identify: bool = True) -> E: 230 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 231 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 232 if ( 233 isinstance(expression, exp.Identifier) 234 and isinstance(expression.parent, exp.Table) 235 and expression.name.lower() == "dual" 236 ): 237 return expression # type: ignore 238 239 return super().quote_identifier(expression, identify=identify) 240 241 class Parser(parser.Parser): 242 IDENTIFY_PIVOT_STRINGS = True 243 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 244 COLON_IS_JSON_EXTRACT = True 245 246 ID_VAR_TOKENS = { 247 *parser.Parser.ID_VAR_TOKENS, 248 TokenType.MATCH_CONDITION, 249 } 250 251 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 252 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 253 254 FUNCTIONS = { 255 **parser.Parser.FUNCTIONS, 256 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 257 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 258 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 259 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 260 this=seq_get(args, 1), expression=seq_get(args, 0) 261 ), 262 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 263 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 264 start=seq_get(args, 0), 265 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 266 step=seq_get(args, 2), 267 ), 268 "BITXOR": binary_from_function(exp.BitwiseXor), 269 "BIT_XOR": binary_from_function(exp.BitwiseXor), 270 "BOOLXOR": binary_from_function(exp.Xor), 271 "CONVERT_TIMEZONE": _build_convert_timezone, 272 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 273 "DATE_TRUNC": _date_trunc_to_time, 274 "DATEADD": _build_date_time_add(exp.DateAdd), 275 "DATEDIFF": _build_datediff, 276 "DIV0": _build_if_from_div0, 277 "FLATTEN": exp.Explode.from_arg_list, 278 "GET_PATH": lambda args, dialect: exp.JSONExtract( 279 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 280 ), 281 "IFF": exp.If.from_arg_list, 282 "LAST_DAY": lambda args: exp.LastDay( 283 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 284 ), 285 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 286 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LISTAGG": exp.GroupConcat.from_arg_list, 288 "MEDIAN": lambda args: exp.PercentileCont( 289 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 290 ), 291 "NULLIFZERO": _build_if_from_nullifzero, 292 "OBJECT_CONSTRUCT": _build_object_construct, 293 "REGEXP_REPLACE": _build_regexp_replace, 294 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 295 "RLIKE": exp.RegexpLike.from_arg_list, 296 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 297 "TIMEADD": _build_date_time_add(exp.TimeAdd), 298 "TIMEDIFF": _build_datediff, 299 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 300 "TIMESTAMPDIFF": _build_datediff, 301 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 302 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 303 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 304 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 305 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 306 "TO_NUMBER": lambda args: exp.ToNumber( 307 this=seq_get(args, 0), 308 format=seq_get(args, 1), 309 precision=seq_get(args, 2), 310 scale=seq_get(args, 3), 311 ), 312 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 313 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 314 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 315 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 316 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 317 "TO_VARCHAR": exp.ToChar.from_arg_list, 318 "ZEROIFNULL": _build_if_from_zeroifnull, 319 } 320 321 FUNCTION_PARSERS = { 322 **parser.Parser.FUNCTION_PARSERS, 323 "DATE_PART": lambda self: self._parse_date_part(), 324 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 325 } 326 FUNCTION_PARSERS.pop("TRIM") 327 328 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 329 330 RANGE_PARSERS = { 331 **parser.Parser.RANGE_PARSERS, 332 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 333 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 334 } 335 336 ALTER_PARSERS = { 337 **parser.Parser.ALTER_PARSERS, 338 "UNSET": lambda self: self.expression( 339 exp.Set, 340 tag=self._match_text_seq("TAG"), 341 expressions=self._parse_csv(self._parse_id_var), 342 unset=True, 343 ), 344 "SWAP": lambda self: self._parse_alter_table_swap(), 345 } 346 347 STATEMENT_PARSERS = { 348 **parser.Parser.STATEMENT_PARSERS, 349 TokenType.SHOW: lambda self: self._parse_show(), 350 } 351 352 PROPERTY_PARSERS = { 353 **parser.Parser.PROPERTY_PARSERS, 354 "LOCATION": lambda self: self._parse_location_property(), 355 } 356 357 TYPE_CONVERTERS = { 358 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 359 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 360 } 361 362 SHOW_PARSERS = { 363 "SCHEMAS": _show_parser("SCHEMAS"), 364 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 365 "OBJECTS": _show_parser("OBJECTS"), 366 "TERSE OBJECTS": _show_parser("OBJECTS"), 367 "TABLES": _show_parser("TABLES"), 368 "TERSE TABLES": _show_parser("TABLES"), 369 "VIEWS": _show_parser("VIEWS"), 370 "TERSE VIEWS": _show_parser("VIEWS"), 371 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 372 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 374 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 376 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "SEQUENCES": _show_parser("SEQUENCES"), 378 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 379 "COLUMNS": _show_parser("COLUMNS"), 380 "USERS": _show_parser("USERS"), 381 "TERSE USERS": _show_parser("USERS"), 382 } 383 384 CONSTRAINT_PARSERS = { 385 **parser.Parser.CONSTRAINT_PARSERS, 386 "WITH": lambda self: self._parse_with_constraint(), 387 "MASKING": lambda self: self._parse_with_constraint(), 388 "PROJECTION": lambda self: self._parse_with_constraint(), 389 "TAG": lambda self: self._parse_with_constraint(), 390 } 391 392 STAGED_FILE_SINGLE_TOKENS = { 393 TokenType.DOT, 394 TokenType.MOD, 395 TokenType.SLASH, 396 } 397 398 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 399 400 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 401 402 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 403 404 LAMBDAS = { 405 **parser.Parser.LAMBDAS, 406 TokenType.ARROW: lambda self, expressions: self.expression( 407 exp.Lambda, 408 this=self._replace_lambda( 409 self._parse_assignment(), 410 expressions, 411 ), 412 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 413 ), 414 } 415 416 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 417 if self._prev.token_type != TokenType.WITH: 418 self._retreat(self._index - 1) 419 420 if self._match_text_seq("MASKING", "POLICY"): 421 policy = self._parse_column() 422 return self.expression( 423 exp.MaskingPolicyColumnConstraint, 424 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 425 expressions=self._match(TokenType.USING) 426 and self._parse_wrapped_csv(self._parse_id_var), 427 ) 428 if self._match_text_seq("PROJECTION", "POLICY"): 429 policy = self._parse_column() 430 return self.expression( 431 exp.ProjectionPolicyColumnConstraint, 432 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 433 ) 434 if self._match(TokenType.TAG): 435 return self.expression( 436 exp.TagColumnConstraint, 437 expressions=self._parse_wrapped_csv(self._parse_property), 438 ) 439 440 return None 441 442 def _parse_create(self) -> exp.Create | exp.Command: 443 expression = super()._parse_create() 444 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 445 # Replace the Table node with the enclosed Identifier 446 expression.this.replace(expression.this.this) 447 448 return expression 449 450 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 451 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 452 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 453 this = self._parse_var() or self._parse_type() 454 455 if not this: 456 return None 457 458 self._match(TokenType.COMMA) 459 expression = self._parse_bitwise() 460 this = map_date_part(this) 461 name = this.name.upper() 462 463 if name.startswith("EPOCH"): 464 if name == "EPOCH_MILLISECOND": 465 scale = 10**3 466 elif name == "EPOCH_MICROSECOND": 467 scale = 10**6 468 elif name == "EPOCH_NANOSECOND": 469 scale = 10**9 470 else: 471 scale = None 472 473 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 474 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 475 476 if scale: 477 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 478 479 return to_unix 480 481 return self.expression(exp.Extract, this=this, expression=expression) 482 483 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 484 if is_map: 485 # Keys are strings in Snowflake's objects, see also: 486 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 487 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 488 return self._parse_slice(self._parse_string()) 489 490 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 491 492 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 493 lateral = super()._parse_lateral() 494 if not lateral: 495 return lateral 496 497 if isinstance(lateral.this, exp.Explode): 498 table_alias = lateral.args.get("alias") 499 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 500 if table_alias and not table_alias.args.get("columns"): 501 table_alias.set("columns", columns) 502 elif not table_alias: 503 exp.alias_(lateral, "_flattened", table=columns, copy=False) 504 505 return lateral 506 507 def _parse_at_before(self, table: exp.Table) -> exp.Table: 508 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 509 index = self._index 510 if self._match_texts(("AT", "BEFORE")): 511 this = self._prev.text.upper() 512 kind = ( 513 self._match(TokenType.L_PAREN) 514 and self._match_texts(self.HISTORICAL_DATA_KIND) 515 and self._prev.text.upper() 516 ) 517 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 518 519 if expression: 520 self._match_r_paren() 521 when = self.expression( 522 exp.HistoricalData, this=this, kind=kind, expression=expression 523 ) 524 table.set("when", when) 525 else: 526 self._retreat(index) 527 528 return table 529 530 def _parse_table_parts( 531 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 532 ) -> exp.Table: 533 # https://docs.snowflake.com/en/user-guide/querying-stage 534 if self._match(TokenType.STRING, advance=False): 535 table = self._parse_string() 536 elif self._match_text_seq("@", advance=False): 537 table = self._parse_location_path() 538 else: 539 table = None 540 541 if table: 542 file_format = None 543 pattern = None 544 545 wrapped = self._match(TokenType.L_PAREN) 546 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 547 if self._match_text_seq("FILE_FORMAT", "=>"): 548 file_format = self._parse_string() or super()._parse_table_parts( 549 is_db_reference=is_db_reference 550 ) 551 elif self._match_text_seq("PATTERN", "=>"): 552 pattern = self._parse_string() 553 else: 554 break 555 556 self._match(TokenType.COMMA) 557 558 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 559 else: 560 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 561 562 return self._parse_at_before(table) 563 564 def _parse_id_var( 565 self, 566 any_token: bool = True, 567 tokens: t.Optional[t.Collection[TokenType]] = None, 568 ) -> t.Optional[exp.Expression]: 569 if self._match_text_seq("IDENTIFIER", "("): 570 identifier = ( 571 super()._parse_id_var(any_token=any_token, tokens=tokens) 572 or self._parse_string() 573 ) 574 self._match_r_paren() 575 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 576 577 return super()._parse_id_var(any_token=any_token, tokens=tokens) 578 579 def _parse_show_snowflake(self, this: str) -> exp.Show: 580 scope = None 581 scope_kind = None 582 583 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 584 # which is syntactically valid but has no effect on the output 585 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 586 587 history = self._match_text_seq("HISTORY") 588 589 like = self._parse_string() if self._match(TokenType.LIKE) else None 590 591 if self._match(TokenType.IN): 592 if self._match_text_seq("ACCOUNT"): 593 scope_kind = "ACCOUNT" 594 elif self._match_set(self.DB_CREATABLES): 595 scope_kind = self._prev.text.upper() 596 if self._curr: 597 scope = self._parse_table_parts() 598 elif self._curr: 599 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 600 scope = self._parse_table_parts() 601 602 return self.expression( 603 exp.Show, 604 **{ 605 "terse": terse, 606 "this": this, 607 "history": history, 608 "like": like, 609 "scope": scope, 610 "scope_kind": scope_kind, 611 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 612 "limit": self._parse_limit(), 613 "from": self._parse_string() if self._match(TokenType.FROM) else None, 614 }, 615 ) 616 617 def _parse_alter_table_swap(self) -> exp.SwapTable: 618 self._match_text_seq("WITH") 619 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 620 621 def _parse_location_property(self) -> exp.LocationProperty: 622 self._match(TokenType.EQ) 623 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 624 625 def _parse_file_location(self) -> t.Optional[exp.Expression]: 626 # Parse either a subquery or a staged file 627 return ( 628 self._parse_select(table=True, parse_subquery_alias=False) 629 if self._match(TokenType.L_PAREN, advance=False) 630 else self._parse_table_parts() 631 ) 632 633 def _parse_location_path(self) -> exp.Var: 634 parts = [self._advance_any(ignore_reserved=True)] 635 636 # We avoid consuming a comma token because external tables like @foo and @bar 637 # can be joined in a query with a comma separator, as well as closing paren 638 # in case of subqueries 639 while self._is_connected() and not self._match_set( 640 (TokenType.COMMA, TokenType.R_PAREN), advance=False 641 ): 642 parts.append(self._advance_any(ignore_reserved=True)) 643 644 return exp.var("".join(part.text for part in parts if part)) 645 646 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 647 this = super()._parse_lambda_arg() 648 649 if not this: 650 return this 651 652 typ = self._parse_types() 653 654 if typ: 655 return self.expression(exp.Cast, this=this, to=typ) 656 657 return this 658 659 class Tokenizer(tokens.Tokenizer): 660 STRING_ESCAPES = ["\\", "'"] 661 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 662 RAW_STRINGS = ["$$"] 663 COMMENTS = ["--", "//", ("/*", "*/")] 664 665 KEYWORDS = { 666 **tokens.Tokenizer.KEYWORDS, 667 "BYTEINT": TokenType.INT, 668 "CHAR VARYING": TokenType.VARCHAR, 669 "CHARACTER VARYING": TokenType.VARCHAR, 670 "EXCLUDE": TokenType.EXCEPT, 671 "ILIKE ANY": TokenType.ILIKE_ANY, 672 "LIKE ANY": TokenType.LIKE_ANY, 673 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 674 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 675 "MINUS": TokenType.EXCEPT, 676 "NCHAR VARYING": TokenType.VARCHAR, 677 "PUT": TokenType.COMMAND, 678 "REMOVE": TokenType.COMMAND, 679 "RM": TokenType.COMMAND, 680 "SAMPLE": TokenType.TABLE_SAMPLE, 681 "SQL_DOUBLE": TokenType.DOUBLE, 682 "SQL_VARCHAR": TokenType.VARCHAR, 683 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 684 "TAG": TokenType.TAG, 685 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 686 "TOP": TokenType.TOP, 687 "WAREHOUSE": TokenType.WAREHOUSE, 688 "STREAMLIT": TokenType.STREAMLIT, 689 } 690 KEYWORDS.pop("/*+") 691 692 SINGLE_TOKENS = { 693 **tokens.Tokenizer.SINGLE_TOKENS, 694 "$": TokenType.PARAMETER, 695 } 696 697 VAR_SINGLE_TOKENS = {"$"} 698 699 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 700 701 class Generator(generator.Generator): 702 PARAMETER_TOKEN = "$" 703 MATCHED_BY_SOURCE = False 704 SINGLE_STRING_INTERVAL = True 705 JOIN_HINTS = False 706 TABLE_HINTS = False 707 QUERY_HINTS = False 708 AGGREGATE_FILTER_SUPPORTED = False 709 SUPPORTS_TABLE_COPY = False 710 COLLATE_IS_FUNC = True 711 LIMIT_ONLY_LITERALS = True 712 JSON_KEY_VALUE_PAIR_SEP = "," 713 INSERT_OVERWRITE = " OVERWRITE INTO" 714 STRUCT_DELIMITER = ("(", ")") 715 COPY_PARAMS_ARE_WRAPPED = False 716 COPY_PARAMS_EQ_REQUIRED = True 717 STAR_EXCEPT = "EXCLUDE" 718 719 TRANSFORMS = { 720 **generator.Generator.TRANSFORMS, 721 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 722 exp.ArgMax: rename_func("MAX_BY"), 723 exp.ArgMin: rename_func("MIN_BY"), 724 exp.Array: inline_array_sql, 725 exp.ArrayConcat: rename_func("ARRAY_CAT"), 726 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 727 exp.AtTimeZone: lambda self, e: self.func( 728 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 729 ), 730 exp.BitwiseXor: rename_func("BITXOR"), 731 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 732 exp.DateAdd: date_delta_sql("DATEADD"), 733 exp.DateDiff: date_delta_sql("DATEDIFF"), 734 exp.DateStrToDate: datestrtodate_sql, 735 exp.DayOfMonth: rename_func("DAYOFMONTH"), 736 exp.DayOfWeek: rename_func("DAYOFWEEK"), 737 exp.DayOfYear: rename_func("DAYOFYEAR"), 738 exp.Explode: rename_func("FLATTEN"), 739 exp.Extract: rename_func("DATE_PART"), 740 exp.FromTimeZone: lambda self, e: self.func( 741 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 742 ), 743 exp.GenerateSeries: lambda self, e: self.func( 744 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 745 ), 746 exp.GroupConcat: rename_func("LISTAGG"), 747 exp.If: if_sql(name="IFF", false_value="NULL"), 748 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 749 exp.JSONExtractScalar: lambda self, e: self.func( 750 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 751 ), 752 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 753 exp.JSONPathRoot: lambda *_: "", 754 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 755 exp.LogicalOr: rename_func("BOOLOR_AGG"), 756 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 757 exp.Max: max_or_greatest, 758 exp.Min: min_or_least, 759 exp.ParseJSON: lambda self, e: self.func( 760 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 761 ), 762 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 763 exp.PercentileCont: transforms.preprocess( 764 [transforms.add_within_group_for_percentiles] 765 ), 766 exp.PercentileDisc: transforms.preprocess( 767 [transforms.add_within_group_for_percentiles] 768 ), 769 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 770 exp.RegexpILike: _regexpilike_sql, 771 exp.Rand: rename_func("RANDOM"), 772 exp.Select: transforms.preprocess( 773 [ 774 transforms.eliminate_distinct_on, 775 transforms.explode_to_unnest(), 776 transforms.eliminate_semi_and_anti_joins, 777 ] 778 ), 779 exp.SHA: rename_func("SHA1"), 780 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 781 exp.StartsWith: rename_func("STARTSWITH"), 782 exp.StrPosition: lambda self, e: self.func( 783 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 784 ), 785 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 786 exp.Stuff: rename_func("INSERT"), 787 exp.TimeAdd: date_delta_sql("TIMEADD"), 788 exp.TimestampDiff: lambda self, e: self.func( 789 "TIMESTAMPDIFF", e.unit, e.expression, e.this 790 ), 791 exp.TimestampTrunc: timestamptrunc_sql(), 792 exp.TimeStrToTime: timestrtotime_sql, 793 exp.TimeToStr: lambda self, e: self.func( 794 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 795 ), 796 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 797 exp.ToArray: rename_func("TO_ARRAY"), 798 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 799 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 800 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 801 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 802 exp.TsOrDsToDate: lambda self, e: self.func( 803 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 804 ), 805 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 806 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 807 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 808 exp.Xor: rename_func("BOOLXOR"), 809 } 810 811 SUPPORTED_JSON_PATH_PARTS = { 812 exp.JSONPathKey, 813 exp.JSONPathRoot, 814 exp.JSONPathSubscript, 815 } 816 817 TYPE_MAPPING = { 818 **generator.Generator.TYPE_MAPPING, 819 exp.DataType.Type.NESTED: "OBJECT", 820 exp.DataType.Type.STRUCT: "OBJECT", 821 } 822 823 PROPERTIES_LOCATION = { 824 **generator.Generator.PROPERTIES_LOCATION, 825 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 826 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 827 } 828 829 UNSUPPORTED_VALUES_EXPRESSIONS = { 830 exp.Map, 831 exp.StarMap, 832 exp.Struct, 833 exp.VarMap, 834 } 835 836 def with_properties(self, properties: exp.Properties) -> str: 837 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 838 839 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 840 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 841 values_as_table = False 842 843 return super().values_sql(expression, values_as_table=values_as_table) 844 845 def datatype_sql(self, expression: exp.DataType) -> str: 846 expressions = expression.expressions 847 if ( 848 expressions 849 and expression.is_type(*exp.DataType.STRUCT_TYPES) 850 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 851 ): 852 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 853 return "OBJECT" 854 855 return super().datatype_sql(expression) 856 857 def tonumber_sql(self, expression: exp.ToNumber) -> str: 858 return self.func( 859 "TO_NUMBER", 860 expression.this, 861 expression.args.get("format"), 862 expression.args.get("precision"), 863 expression.args.get("scale"), 864 ) 865 866 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 867 milli = expression.args.get("milli") 868 if milli is not None: 869 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 870 expression.set("nano", milli_to_nano) 871 872 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 873 874 def trycast_sql(self, expression: exp.TryCast) -> str: 875 value = expression.this 876 877 if value.type is None: 878 from sqlglot.optimizer.annotate_types import annotate_types 879 880 value = annotate_types(value) 881 882 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 883 return super().trycast_sql(expression) 884 885 # TRY_CAST only works for string values in Snowflake 886 return self.cast_sql(expression) 887 888 def log_sql(self, expression: exp.Log) -> str: 889 if not expression.expression: 890 return self.func("LN", expression.this) 891 892 return super().log_sql(expression) 893 894 def unnest_sql(self, expression: exp.Unnest) -> str: 895 unnest_alias = expression.args.get("alias") 896 offset = expression.args.get("offset") 897 898 columns = [ 899 exp.to_identifier("seq"), 900 exp.to_identifier("key"), 901 exp.to_identifier("path"), 902 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 903 seq_get(unnest_alias.columns if unnest_alias else [], 0) 904 or exp.to_identifier("value"), 905 exp.to_identifier("this"), 906 ] 907 908 if unnest_alias: 909 unnest_alias.set("columns", columns) 910 else: 911 unnest_alias = exp.TableAlias(this="_u", columns=columns) 912 913 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 914 alias = self.sql(unnest_alias) 915 alias = f" AS {alias}" if alias else "" 916 return f"{explode}{alias}" 917 918 def show_sql(self, expression: exp.Show) -> str: 919 terse = "TERSE " if expression.args.get("terse") else "" 920 history = " HISTORY" if expression.args.get("history") else "" 921 like = self.sql(expression, "like") 922 like = f" LIKE {like}" if like else "" 923 924 scope = self.sql(expression, "scope") 925 scope = f" {scope}" if scope else "" 926 927 scope_kind = self.sql(expression, "scope_kind") 928 if scope_kind: 929 scope_kind = f" IN {scope_kind}" 930 931 starts_with = self.sql(expression, "starts_with") 932 if starts_with: 933 starts_with = f" STARTS WITH {starts_with}" 934 935 limit = self.sql(expression, "limit") 936 937 from_ = self.sql(expression, "from") 938 if from_: 939 from_ = f" FROM {from_}" 940 941 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 942 943 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 944 # Other dialects don't support all of the following parameters, so we need to 945 # generate default values as necessary to ensure the transpilation is correct 946 group = expression.args.get("group") 947 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 948 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 949 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 950 951 return self.func( 952 "REGEXP_SUBSTR", 953 expression.this, 954 expression.expression, 955 position, 956 occurrence, 957 parameters, 958 group, 959 ) 960 961 def except_op(self, expression: exp.Except) -> str: 962 if not expression.args.get("distinct"): 963 self.unsupported("EXCEPT with All is not supported in Snowflake") 964 return super().except_op(expression) 965 966 def intersect_op(self, expression: exp.Intersect) -> str: 967 if not expression.args.get("distinct"): 968 self.unsupported("INTERSECT with All is not supported in Snowflake") 969 return super().intersect_op(expression) 970 971 def describe_sql(self, expression: exp.Describe) -> str: 972 # Default to table if kind is unknown 973 kind_value = expression.args.get("kind") or "TABLE" 974 kind = f" {kind_value}" if kind_value else "" 975 this = f" {self.sql(expression, 'this')}" 976 expressions = self.expressions(expression, flat=True) 977 expressions = f" {expressions}" if expressions else "" 978 return f"DESCRIBE{kind}{this}{expressions}" 979 980 def generatedasidentitycolumnconstraint_sql( 981 self, expression: exp.GeneratedAsIdentityColumnConstraint 982 ) -> str: 983 start = expression.args.get("start") 984 start = f" START {start}" if start else "" 985 increment = expression.args.get("increment") 986 increment = f" INCREMENT {increment}" if increment else "" 987 return f"AUTOINCREMENT{start}{increment}" 988 989 def swaptable_sql(self, expression: exp.SwapTable) -> str: 990 this = self.sql(expression, "this") 991 return f"SWAP WITH {this}" 992 993 def cluster_sql(self, expression: exp.Cluster) -> str: 994 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 995 996 def struct_sql(self, expression: exp.Struct) -> str: 997 keys = [] 998 values = [] 999 1000 for i, e in enumerate(expression.expressions): 1001 if isinstance(e, exp.PropertyEQ): 1002 keys.append( 1003 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1004 ) 1005 values.append(e.expression) 1006 else: 1007 keys.append(exp.Literal.string(f"_{i}")) 1008 values.append(e) 1009 1010 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1011 1012 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1013 if expression.args.get("weight") or expression.args.get("accuracy"): 1014 self.unsupported( 1015 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1016 ) 1017 1018 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1019 1020 def alterset_sql(self, expression: exp.AlterSet) -> str: 1021 exprs = self.expressions(expression, flat=True) 1022 exprs = f" {exprs}" if exprs else "" 1023 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1024 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1025 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1026 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1027 tag = self.expressions(expression, key="tag", flat=True) 1028 tag = f" TAG {tag}" if tag else "" 1029 1030 return f"SET{exprs}{file_format}{copy_options}{tag}"
190class Snowflake(Dialect): 191 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 192 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 193 NULL_ORDERING = "nulls_are_large" 194 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 195 SUPPORTS_USER_DEFINED_TYPES = False 196 SUPPORTS_SEMI_ANTI_JOIN = False 197 PREFER_CTE_ALIAS_COLUMN = True 198 TABLESAMPLE_SIZE_IS_PERCENT = True 199 COPY_PARAMS_ARE_CSV = False 200 201 TIME_MAPPING = { 202 "YYYY": "%Y", 203 "yyyy": "%Y", 204 "YY": "%y", 205 "yy": "%y", 206 "MMMM": "%B", 207 "mmmm": "%B", 208 "MON": "%b", 209 "mon": "%b", 210 "MM": "%m", 211 "mm": "%m", 212 "DD": "%d", 213 "dd": "%-d", 214 "DY": "%a", 215 "dy": "%w", 216 "HH24": "%H", 217 "hh24": "%H", 218 "HH12": "%I", 219 "hh12": "%I", 220 "MI": "%M", 221 "mi": "%M", 222 "SS": "%S", 223 "ss": "%S", 224 "FF": "%f", 225 "ff": "%f", 226 "FF6": "%f", 227 "ff6": "%f", 228 } 229 230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify) 241 242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_JSON_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "MEDIAN": lambda args: exp.PercentileCont( 290 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 291 ), 292 "NULLIFZERO": _build_if_from_nullifzero, 293 "OBJECT_CONSTRUCT": _build_object_construct, 294 "REGEXP_REPLACE": _build_regexp_replace, 295 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 296 "RLIKE": exp.RegexpLike.from_arg_list, 297 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 298 "TIMEADD": _build_date_time_add(exp.TimeAdd), 299 "TIMEDIFF": _build_datediff, 300 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 301 "TIMESTAMPDIFF": _build_datediff, 302 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 303 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 304 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 305 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 306 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 307 "TO_NUMBER": lambda args: exp.ToNumber( 308 this=seq_get(args, 0), 309 format=seq_get(args, 1), 310 precision=seq_get(args, 2), 311 scale=seq_get(args, 3), 312 ), 313 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 314 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 316 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 317 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 318 "TO_VARCHAR": exp.ToChar.from_arg_list, 319 "ZEROIFNULL": _build_if_from_zeroifnull, 320 } 321 322 FUNCTION_PARSERS = { 323 **parser.Parser.FUNCTION_PARSERS, 324 "DATE_PART": lambda self: self._parse_date_part(), 325 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 326 } 327 FUNCTION_PARSERS.pop("TRIM") 328 329 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 330 331 RANGE_PARSERS = { 332 **parser.Parser.RANGE_PARSERS, 333 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 334 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 335 } 336 337 ALTER_PARSERS = { 338 **parser.Parser.ALTER_PARSERS, 339 "UNSET": lambda self: self.expression( 340 exp.Set, 341 tag=self._match_text_seq("TAG"), 342 expressions=self._parse_csv(self._parse_id_var), 343 unset=True, 344 ), 345 "SWAP": lambda self: self._parse_alter_table_swap(), 346 } 347 348 STATEMENT_PARSERS = { 349 **parser.Parser.STATEMENT_PARSERS, 350 TokenType.SHOW: lambda self: self._parse_show(), 351 } 352 353 PROPERTY_PARSERS = { 354 **parser.Parser.PROPERTY_PARSERS, 355 "LOCATION": lambda self: self._parse_location_property(), 356 } 357 358 TYPE_CONVERTERS = { 359 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 360 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 361 } 362 363 SHOW_PARSERS = { 364 "SCHEMAS": _show_parser("SCHEMAS"), 365 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 366 "OBJECTS": _show_parser("OBJECTS"), 367 "TERSE OBJECTS": _show_parser("OBJECTS"), 368 "TABLES": _show_parser("TABLES"), 369 "TERSE TABLES": _show_parser("TABLES"), 370 "VIEWS": _show_parser("VIEWS"), 371 "TERSE VIEWS": _show_parser("VIEWS"), 372 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 374 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 376 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 378 "SEQUENCES": _show_parser("SEQUENCES"), 379 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 380 "COLUMNS": _show_parser("COLUMNS"), 381 "USERS": _show_parser("USERS"), 382 "TERSE USERS": _show_parser("USERS"), 383 } 384 385 CONSTRAINT_PARSERS = { 386 **parser.Parser.CONSTRAINT_PARSERS, 387 "WITH": lambda self: self._parse_with_constraint(), 388 "MASKING": lambda self: self._parse_with_constraint(), 389 "PROJECTION": lambda self: self._parse_with_constraint(), 390 "TAG": lambda self: self._parse_with_constraint(), 391 } 392 393 STAGED_FILE_SINGLE_TOKENS = { 394 TokenType.DOT, 395 TokenType.MOD, 396 TokenType.SLASH, 397 } 398 399 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 400 401 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 402 403 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 404 405 LAMBDAS = { 406 **parser.Parser.LAMBDAS, 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_assignment(), 411 expressions, 412 ), 413 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 414 ), 415 } 416 417 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 418 if self._prev.token_type != TokenType.WITH: 419 self._retreat(self._index - 1) 420 421 if self._match_text_seq("MASKING", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.MaskingPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 expressions=self._match(TokenType.USING) 427 and self._parse_wrapped_csv(self._parse_id_var), 428 ) 429 if self._match_text_seq("PROJECTION", "POLICY"): 430 policy = self._parse_column() 431 return self.expression( 432 exp.ProjectionPolicyColumnConstraint, 433 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 434 ) 435 if self._match(TokenType.TAG): 436 return self.expression( 437 exp.TagColumnConstraint, 438 expressions=self._parse_wrapped_csv(self._parse_property), 439 ) 440 441 return None 442 443 def _parse_create(self) -> exp.Create | exp.Command: 444 expression = super()._parse_create() 445 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 446 # Replace the Table node with the enclosed Identifier 447 expression.this.replace(expression.this.this) 448 449 return expression 450 451 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 452 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 453 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 454 this = self._parse_var() or self._parse_type() 455 456 if not this: 457 return None 458 459 self._match(TokenType.COMMA) 460 expression = self._parse_bitwise() 461 this = map_date_part(this) 462 name = this.name.upper() 463 464 if name.startswith("EPOCH"): 465 if name == "EPOCH_MILLISECOND": 466 scale = 10**3 467 elif name == "EPOCH_MICROSECOND": 468 scale = 10**6 469 elif name == "EPOCH_NANOSECOND": 470 scale = 10**9 471 else: 472 scale = None 473 474 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 475 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 476 477 if scale: 478 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 479 480 return to_unix 481 482 return self.expression(exp.Extract, this=this, expression=expression) 483 484 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 485 if is_map: 486 # Keys are strings in Snowflake's objects, see also: 487 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 488 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 489 return self._parse_slice(self._parse_string()) 490 491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 492 493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 494 lateral = super()._parse_lateral() 495 if not lateral: 496 return lateral 497 498 if isinstance(lateral.this, exp.Explode): 499 table_alias = lateral.args.get("alias") 500 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 501 if table_alias and not table_alias.args.get("columns"): 502 table_alias.set("columns", columns) 503 elif not table_alias: 504 exp.alias_(lateral, "_flattened", table=columns, copy=False) 505 506 return lateral 507 508 def _parse_at_before(self, table: exp.Table) -> exp.Table: 509 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 510 index = self._index 511 if self._match_texts(("AT", "BEFORE")): 512 this = self._prev.text.upper() 513 kind = ( 514 self._match(TokenType.L_PAREN) 515 and self._match_texts(self.HISTORICAL_DATA_KIND) 516 and self._prev.text.upper() 517 ) 518 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 519 520 if expression: 521 self._match_r_paren() 522 when = self.expression( 523 exp.HistoricalData, this=this, kind=kind, expression=expression 524 ) 525 table.set("when", when) 526 else: 527 self._retreat(index) 528 529 return table 530 531 def _parse_table_parts( 532 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 533 ) -> exp.Table: 534 # https://docs.snowflake.com/en/user-guide/querying-stage 535 if self._match(TokenType.STRING, advance=False): 536 table = self._parse_string() 537 elif self._match_text_seq("@", advance=False): 538 table = self._parse_location_path() 539 else: 540 table = None 541 542 if table: 543 file_format = None 544 pattern = None 545 546 wrapped = self._match(TokenType.L_PAREN) 547 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 548 if self._match_text_seq("FILE_FORMAT", "=>"): 549 file_format = self._parse_string() or super()._parse_table_parts( 550 is_db_reference=is_db_reference 551 ) 552 elif self._match_text_seq("PATTERN", "=>"): 553 pattern = self._parse_string() 554 else: 555 break 556 557 self._match(TokenType.COMMA) 558 559 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 560 else: 561 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 562 563 return self._parse_at_before(table) 564 565 def _parse_id_var( 566 self, 567 any_token: bool = True, 568 tokens: t.Optional[t.Collection[TokenType]] = None, 569 ) -> t.Optional[exp.Expression]: 570 if self._match_text_seq("IDENTIFIER", "("): 571 identifier = ( 572 super()._parse_id_var(any_token=any_token, tokens=tokens) 573 or self._parse_string() 574 ) 575 self._match_r_paren() 576 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 577 578 return super()._parse_id_var(any_token=any_token, tokens=tokens) 579 580 def _parse_show_snowflake(self, this: str) -> exp.Show: 581 scope = None 582 scope_kind = None 583 584 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 585 # which is syntactically valid but has no effect on the output 586 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 587 588 history = self._match_text_seq("HISTORY") 589 590 like = self._parse_string() if self._match(TokenType.LIKE) else None 591 592 if self._match(TokenType.IN): 593 if self._match_text_seq("ACCOUNT"): 594 scope_kind = "ACCOUNT" 595 elif self._match_set(self.DB_CREATABLES): 596 scope_kind = self._prev.text.upper() 597 if self._curr: 598 scope = self._parse_table_parts() 599 elif self._curr: 600 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 601 scope = self._parse_table_parts() 602 603 return self.expression( 604 exp.Show, 605 **{ 606 "terse": terse, 607 "this": this, 608 "history": history, 609 "like": like, 610 "scope": scope, 611 "scope_kind": scope_kind, 612 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 613 "limit": self._parse_limit(), 614 "from": self._parse_string() if self._match(TokenType.FROM) else None, 615 }, 616 ) 617 618 def _parse_alter_table_swap(self) -> exp.SwapTable: 619 self._match_text_seq("WITH") 620 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 621 622 def _parse_location_property(self) -> exp.LocationProperty: 623 self._match(TokenType.EQ) 624 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 625 626 def _parse_file_location(self) -> t.Optional[exp.Expression]: 627 # Parse either a subquery or a staged file 628 return ( 629 self._parse_select(table=True, parse_subquery_alias=False) 630 if self._match(TokenType.L_PAREN, advance=False) 631 else self._parse_table_parts() 632 ) 633 634 def _parse_location_path(self) -> exp.Var: 635 parts = [self._advance_any(ignore_reserved=True)] 636 637 # We avoid consuming a comma token because external tables like @foo and @bar 638 # can be joined in a query with a comma separator, as well as closing paren 639 # in case of subqueries 640 while self._is_connected() and not self._match_set( 641 (TokenType.COMMA, TokenType.R_PAREN), advance=False 642 ): 643 parts.append(self._advance_any(ignore_reserved=True)) 644 645 return exp.var("".join(part.text for part in parts if part)) 646 647 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 648 this = super()._parse_lambda_arg() 649 650 if not this: 651 return this 652 653 typ = self._parse_types() 654 655 if typ: 656 return self.expression(exp.Cast, this=this, to=typ) 657 658 return this 659 660 class Tokenizer(tokens.Tokenizer): 661 STRING_ESCAPES = ["\\", "'"] 662 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 663 RAW_STRINGS = ["$$"] 664 COMMENTS = ["--", "//", ("/*", "*/")] 665 666 KEYWORDS = { 667 **tokens.Tokenizer.KEYWORDS, 668 "BYTEINT": TokenType.INT, 669 "CHAR VARYING": TokenType.VARCHAR, 670 "CHARACTER VARYING": TokenType.VARCHAR, 671 "EXCLUDE": TokenType.EXCEPT, 672 "ILIKE ANY": TokenType.ILIKE_ANY, 673 "LIKE ANY": TokenType.LIKE_ANY, 674 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 675 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 676 "MINUS": TokenType.EXCEPT, 677 "NCHAR VARYING": TokenType.VARCHAR, 678 "PUT": TokenType.COMMAND, 679 "REMOVE": TokenType.COMMAND, 680 "RM": TokenType.COMMAND, 681 "SAMPLE": TokenType.TABLE_SAMPLE, 682 "SQL_DOUBLE": TokenType.DOUBLE, 683 "SQL_VARCHAR": TokenType.VARCHAR, 684 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 685 "TAG": TokenType.TAG, 686 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 687 "TOP": TokenType.TOP, 688 "WAREHOUSE": TokenType.WAREHOUSE, 689 "STREAMLIT": TokenType.STREAMLIT, 690 } 691 KEYWORDS.pop("/*+") 692 693 SINGLE_TOKENS = { 694 **tokens.Tokenizer.SINGLE_TOKENS, 695 "$": TokenType.PARAMETER, 696 } 697 698 VAR_SINGLE_TOKENS = {"$"} 699 700 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 701 702 class Generator(generator.Generator): 703 PARAMETER_TOKEN = "$" 704 MATCHED_BY_SOURCE = False 705 SINGLE_STRING_INTERVAL = True 706 JOIN_HINTS = False 707 TABLE_HINTS = False 708 QUERY_HINTS = False 709 AGGREGATE_FILTER_SUPPORTED = False 710 SUPPORTS_TABLE_COPY = False 711 COLLATE_IS_FUNC = True 712 LIMIT_ONLY_LITERALS = True 713 JSON_KEY_VALUE_PAIR_SEP = "," 714 INSERT_OVERWRITE = " OVERWRITE INTO" 715 STRUCT_DELIMITER = ("(", ")") 716 COPY_PARAMS_ARE_WRAPPED = False 717 COPY_PARAMS_EQ_REQUIRED = True 718 STAR_EXCEPT = "EXCLUDE" 719 720 TRANSFORMS = { 721 **generator.Generator.TRANSFORMS, 722 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 723 exp.ArgMax: rename_func("MAX_BY"), 724 exp.ArgMin: rename_func("MIN_BY"), 725 exp.Array: inline_array_sql, 726 exp.ArrayConcat: rename_func("ARRAY_CAT"), 727 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 728 exp.AtTimeZone: lambda self, e: self.func( 729 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 730 ), 731 exp.BitwiseXor: rename_func("BITXOR"), 732 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 733 exp.DateAdd: date_delta_sql("DATEADD"), 734 exp.DateDiff: date_delta_sql("DATEDIFF"), 735 exp.DateStrToDate: datestrtodate_sql, 736 exp.DayOfMonth: rename_func("DAYOFMONTH"), 737 exp.DayOfWeek: rename_func("DAYOFWEEK"), 738 exp.DayOfYear: rename_func("DAYOFYEAR"), 739 exp.Explode: rename_func("FLATTEN"), 740 exp.Extract: rename_func("DATE_PART"), 741 exp.FromTimeZone: lambda self, e: self.func( 742 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 743 ), 744 exp.GenerateSeries: lambda self, e: self.func( 745 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 746 ), 747 exp.GroupConcat: rename_func("LISTAGG"), 748 exp.If: if_sql(name="IFF", false_value="NULL"), 749 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 750 exp.JSONExtractScalar: lambda self, e: self.func( 751 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 752 ), 753 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 754 exp.JSONPathRoot: lambda *_: "", 755 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 756 exp.LogicalOr: rename_func("BOOLOR_AGG"), 757 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 758 exp.Max: max_or_greatest, 759 exp.Min: min_or_least, 760 exp.ParseJSON: lambda self, e: self.func( 761 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 762 ), 763 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 764 exp.PercentileCont: transforms.preprocess( 765 [transforms.add_within_group_for_percentiles] 766 ), 767 exp.PercentileDisc: transforms.preprocess( 768 [transforms.add_within_group_for_percentiles] 769 ), 770 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 771 exp.RegexpILike: _regexpilike_sql, 772 exp.Rand: rename_func("RANDOM"), 773 exp.Select: transforms.preprocess( 774 [ 775 transforms.eliminate_distinct_on, 776 transforms.explode_to_unnest(), 777 transforms.eliminate_semi_and_anti_joins, 778 ] 779 ), 780 exp.SHA: rename_func("SHA1"), 781 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 782 exp.StartsWith: rename_func("STARTSWITH"), 783 exp.StrPosition: lambda self, e: self.func( 784 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 785 ), 786 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 787 exp.Stuff: rename_func("INSERT"), 788 exp.TimeAdd: date_delta_sql("TIMEADD"), 789 exp.TimestampDiff: lambda self, e: self.func( 790 "TIMESTAMPDIFF", e.unit, e.expression, e.this 791 ), 792 exp.TimestampTrunc: timestamptrunc_sql(), 793 exp.TimeStrToTime: timestrtotime_sql, 794 exp.TimeToStr: lambda self, e: self.func( 795 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 796 ), 797 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 798 exp.ToArray: rename_func("TO_ARRAY"), 799 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 800 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 801 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 802 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 803 exp.TsOrDsToDate: lambda self, e: self.func( 804 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 805 ), 806 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 807 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 808 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 809 exp.Xor: rename_func("BOOLXOR"), 810 } 811 812 SUPPORTED_JSON_PATH_PARTS = { 813 exp.JSONPathKey, 814 exp.JSONPathRoot, 815 exp.JSONPathSubscript, 816 } 817 818 TYPE_MAPPING = { 819 **generator.Generator.TYPE_MAPPING, 820 exp.DataType.Type.NESTED: "OBJECT", 821 exp.DataType.Type.STRUCT: "OBJECT", 822 } 823 824 PROPERTIES_LOCATION = { 825 **generator.Generator.PROPERTIES_LOCATION, 826 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 827 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 828 } 829 830 UNSUPPORTED_VALUES_EXPRESSIONS = { 831 exp.Map, 832 exp.StarMap, 833 exp.Struct, 834 exp.VarMap, 835 } 836 837 def with_properties(self, properties: exp.Properties) -> str: 838 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 839 840 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 841 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 842 values_as_table = False 843 844 return super().values_sql(expression, values_as_table=values_as_table) 845 846 def datatype_sql(self, expression: exp.DataType) -> str: 847 expressions = expression.expressions 848 if ( 849 expressions 850 and expression.is_type(*exp.DataType.STRUCT_TYPES) 851 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 852 ): 853 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 854 return "OBJECT" 855 856 return super().datatype_sql(expression) 857 858 def tonumber_sql(self, expression: exp.ToNumber) -> str: 859 return self.func( 860 "TO_NUMBER", 861 expression.this, 862 expression.args.get("format"), 863 expression.args.get("precision"), 864 expression.args.get("scale"), 865 ) 866 867 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 868 milli = expression.args.get("milli") 869 if milli is not None: 870 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 871 expression.set("nano", milli_to_nano) 872 873 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 874 875 def trycast_sql(self, expression: exp.TryCast) -> str: 876 value = expression.this 877 878 if value.type is None: 879 from sqlglot.optimizer.annotate_types import annotate_types 880 881 value = annotate_types(value) 882 883 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 884 return super().trycast_sql(expression) 885 886 # TRY_CAST only works for string values in Snowflake 887 return self.cast_sql(expression) 888 889 def log_sql(self, expression: exp.Log) -> str: 890 if not expression.expression: 891 return self.func("LN", expression.this) 892 893 return super().log_sql(expression) 894 895 def unnest_sql(self, expression: exp.Unnest) -> str: 896 unnest_alias = expression.args.get("alias") 897 offset = expression.args.get("offset") 898 899 columns = [ 900 exp.to_identifier("seq"), 901 exp.to_identifier("key"), 902 exp.to_identifier("path"), 903 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 904 seq_get(unnest_alias.columns if unnest_alias else [], 0) 905 or exp.to_identifier("value"), 906 exp.to_identifier("this"), 907 ] 908 909 if unnest_alias: 910 unnest_alias.set("columns", columns) 911 else: 912 unnest_alias = exp.TableAlias(this="_u", columns=columns) 913 914 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 915 alias = self.sql(unnest_alias) 916 alias = f" AS {alias}" if alias else "" 917 return f"{explode}{alias}" 918 919 def show_sql(self, expression: exp.Show) -> str: 920 terse = "TERSE " if expression.args.get("terse") else "" 921 history = " HISTORY" if expression.args.get("history") else "" 922 like = self.sql(expression, "like") 923 like = f" LIKE {like}" if like else "" 924 925 scope = self.sql(expression, "scope") 926 scope = f" {scope}" if scope else "" 927 928 scope_kind = self.sql(expression, "scope_kind") 929 if scope_kind: 930 scope_kind = f" IN {scope_kind}" 931 932 starts_with = self.sql(expression, "starts_with") 933 if starts_with: 934 starts_with = f" STARTS WITH {starts_with}" 935 936 limit = self.sql(expression, "limit") 937 938 from_ = self.sql(expression, "from") 939 if from_: 940 from_ = f" FROM {from_}" 941 942 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 943 944 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 945 # Other dialects don't support all of the following parameters, so we need to 946 # generate default values as necessary to ensure the transpilation is correct 947 group = expression.args.get("group") 948 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 949 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 950 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 951 952 return self.func( 953 "REGEXP_SUBSTR", 954 expression.this, 955 expression.expression, 956 position, 957 occurrence, 958 parameters, 959 group, 960 ) 961 962 def except_op(self, expression: exp.Except) -> str: 963 if not expression.args.get("distinct"): 964 self.unsupported("EXCEPT with All is not supported in Snowflake") 965 return super().except_op(expression) 966 967 def intersect_op(self, expression: exp.Intersect) -> str: 968 if not expression.args.get("distinct"): 969 self.unsupported("INTERSECT with All is not supported in Snowflake") 970 return super().intersect_op(expression) 971 972 def describe_sql(self, expression: exp.Describe) -> str: 973 # Default to table if kind is unknown 974 kind_value = expression.args.get("kind") or "TABLE" 975 kind = f" {kind_value}" if kind_value else "" 976 this = f" {self.sql(expression, 'this')}" 977 expressions = self.expressions(expression, flat=True) 978 expressions = f" {expressions}" if expressions else "" 979 return f"DESCRIBE{kind}{this}{expressions}" 980 981 def generatedasidentitycolumnconstraint_sql( 982 self, expression: exp.GeneratedAsIdentityColumnConstraint 983 ) -> str: 984 start = expression.args.get("start") 985 start = f" START {start}" if start else "" 986 increment = expression.args.get("increment") 987 increment = f" INCREMENT {increment}" if increment else "" 988 return f"AUTOINCREMENT{start}{increment}" 989 990 def swaptable_sql(self, expression: exp.SwapTable) -> str: 991 this = self.sql(expression, "this") 992 return f"SWAP WITH {this}" 993 994 def cluster_sql(self, expression: exp.Cluster) -> str: 995 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 996 997 def struct_sql(self, expression: exp.Struct) -> str: 998 keys = [] 999 values = [] 1000 1001 for i, e in enumerate(expression.expressions): 1002 if isinstance(e, exp.PropertyEQ): 1003 keys.append( 1004 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1005 ) 1006 values.append(e.expression) 1007 else: 1008 keys.append(exp.Literal.string(f"_{i}")) 1009 values.append(e) 1010 1011 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1012 1013 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1014 if expression.args.get("weight") or expression.args.get("accuracy"): 1015 self.unsupported( 1016 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1017 ) 1018 1019 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1020 1021 def alterset_sql(self, expression: exp.AlterSet) -> str: 1022 exprs = self.expressions(expression, flat=True) 1023 exprs = f" {exprs}" if exprs else "" 1024 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1025 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1026 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1027 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1028 tag = self.expressions(expression, key="tag", flat=True) 1029 tag = f" TAG {tag}" if tag else "" 1030 1031 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
230 def quote_identifier(self, expression: E, identify: bool = True) -> E: 231 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 232 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 233 if ( 234 isinstance(expression, exp.Identifier) 235 and isinstance(expression.parent, exp.Table) 236 and expression.name.lower() == "dual" 237 ): 238 return expression # type: ignore 239 240 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
242 class Parser(parser.Parser): 243 IDENTIFY_PIVOT_STRINGS = True 244 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 245 COLON_IS_JSON_EXTRACT = True 246 247 ID_VAR_TOKENS = { 248 *parser.Parser.ID_VAR_TOKENS, 249 TokenType.MATCH_CONDITION, 250 } 251 252 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 253 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 254 255 FUNCTIONS = { 256 **parser.Parser.FUNCTIONS, 257 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 258 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 259 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 260 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 261 this=seq_get(args, 1), expression=seq_get(args, 0) 262 ), 263 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 264 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 265 start=seq_get(args, 0), 266 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 267 step=seq_get(args, 2), 268 ), 269 "BITXOR": binary_from_function(exp.BitwiseXor), 270 "BIT_XOR": binary_from_function(exp.BitwiseXor), 271 "BOOLXOR": binary_from_function(exp.Xor), 272 "CONVERT_TIMEZONE": _build_convert_timezone, 273 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 274 "DATE_TRUNC": _date_trunc_to_time, 275 "DATEADD": _build_date_time_add(exp.DateAdd), 276 "DATEDIFF": _build_datediff, 277 "DIV0": _build_if_from_div0, 278 "FLATTEN": exp.Explode.from_arg_list, 279 "GET_PATH": lambda args, dialect: exp.JSONExtract( 280 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 281 ), 282 "IFF": exp.If.from_arg_list, 283 "LAST_DAY": lambda args: exp.LastDay( 284 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 285 ), 286 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "LISTAGG": exp.GroupConcat.from_arg_list, 289 "MEDIAN": lambda args: exp.PercentileCont( 290 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 291 ), 292 "NULLIFZERO": _build_if_from_nullifzero, 293 "OBJECT_CONSTRUCT": _build_object_construct, 294 "REGEXP_REPLACE": _build_regexp_replace, 295 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 296 "RLIKE": exp.RegexpLike.from_arg_list, 297 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 298 "TIMEADD": _build_date_time_add(exp.TimeAdd), 299 "TIMEDIFF": _build_datediff, 300 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 301 "TIMESTAMPDIFF": _build_datediff, 302 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 303 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 304 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 305 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 306 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 307 "TO_NUMBER": lambda args: exp.ToNumber( 308 this=seq_get(args, 0), 309 format=seq_get(args, 1), 310 precision=seq_get(args, 2), 311 scale=seq_get(args, 3), 312 ), 313 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 314 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 315 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 316 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 317 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 318 "TO_VARCHAR": exp.ToChar.from_arg_list, 319 "ZEROIFNULL": _build_if_from_zeroifnull, 320 } 321 322 FUNCTION_PARSERS = { 323 **parser.Parser.FUNCTION_PARSERS, 324 "DATE_PART": lambda self: self._parse_date_part(), 325 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 326 } 327 FUNCTION_PARSERS.pop("TRIM") 328 329 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 330 331 RANGE_PARSERS = { 332 **parser.Parser.RANGE_PARSERS, 333 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 334 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 335 } 336 337 ALTER_PARSERS = { 338 **parser.Parser.ALTER_PARSERS, 339 "UNSET": lambda self: self.expression( 340 exp.Set, 341 tag=self._match_text_seq("TAG"), 342 expressions=self._parse_csv(self._parse_id_var), 343 unset=True, 344 ), 345 "SWAP": lambda self: self._parse_alter_table_swap(), 346 } 347 348 STATEMENT_PARSERS = { 349 **parser.Parser.STATEMENT_PARSERS, 350 TokenType.SHOW: lambda self: self._parse_show(), 351 } 352 353 PROPERTY_PARSERS = { 354 **parser.Parser.PROPERTY_PARSERS, 355 "LOCATION": lambda self: self._parse_location_property(), 356 } 357 358 TYPE_CONVERTERS = { 359 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 360 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 361 } 362 363 SHOW_PARSERS = { 364 "SCHEMAS": _show_parser("SCHEMAS"), 365 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 366 "OBJECTS": _show_parser("OBJECTS"), 367 "TERSE OBJECTS": _show_parser("OBJECTS"), 368 "TABLES": _show_parser("TABLES"), 369 "TERSE TABLES": _show_parser("TABLES"), 370 "VIEWS": _show_parser("VIEWS"), 371 "TERSE VIEWS": _show_parser("VIEWS"), 372 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 373 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 374 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 375 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 376 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 377 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 378 "SEQUENCES": _show_parser("SEQUENCES"), 379 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 380 "COLUMNS": _show_parser("COLUMNS"), 381 "USERS": _show_parser("USERS"), 382 "TERSE USERS": _show_parser("USERS"), 383 } 384 385 CONSTRAINT_PARSERS = { 386 **parser.Parser.CONSTRAINT_PARSERS, 387 "WITH": lambda self: self._parse_with_constraint(), 388 "MASKING": lambda self: self._parse_with_constraint(), 389 "PROJECTION": lambda self: self._parse_with_constraint(), 390 "TAG": lambda self: self._parse_with_constraint(), 391 } 392 393 STAGED_FILE_SINGLE_TOKENS = { 394 TokenType.DOT, 395 TokenType.MOD, 396 TokenType.SLASH, 397 } 398 399 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 400 401 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 402 403 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 404 405 LAMBDAS = { 406 **parser.Parser.LAMBDAS, 407 TokenType.ARROW: lambda self, expressions: self.expression( 408 exp.Lambda, 409 this=self._replace_lambda( 410 self._parse_assignment(), 411 expressions, 412 ), 413 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 414 ), 415 } 416 417 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 418 if self._prev.token_type != TokenType.WITH: 419 self._retreat(self._index - 1) 420 421 if self._match_text_seq("MASKING", "POLICY"): 422 policy = self._parse_column() 423 return self.expression( 424 exp.MaskingPolicyColumnConstraint, 425 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 426 expressions=self._match(TokenType.USING) 427 and self._parse_wrapped_csv(self._parse_id_var), 428 ) 429 if self._match_text_seq("PROJECTION", "POLICY"): 430 policy = self._parse_column() 431 return self.expression( 432 exp.ProjectionPolicyColumnConstraint, 433 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 434 ) 435 if self._match(TokenType.TAG): 436 return self.expression( 437 exp.TagColumnConstraint, 438 expressions=self._parse_wrapped_csv(self._parse_property), 439 ) 440 441 return None 442 443 def _parse_create(self) -> exp.Create | exp.Command: 444 expression = super()._parse_create() 445 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 446 # Replace the Table node with the enclosed Identifier 447 expression.this.replace(expression.this.this) 448 449 return expression 450 451 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 452 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 453 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 454 this = self._parse_var() or self._parse_type() 455 456 if not this: 457 return None 458 459 self._match(TokenType.COMMA) 460 expression = self._parse_bitwise() 461 this = map_date_part(this) 462 name = this.name.upper() 463 464 if name.startswith("EPOCH"): 465 if name == "EPOCH_MILLISECOND": 466 scale = 10**3 467 elif name == "EPOCH_MICROSECOND": 468 scale = 10**6 469 elif name == "EPOCH_NANOSECOND": 470 scale = 10**9 471 else: 472 scale = None 473 474 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 475 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 476 477 if scale: 478 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 479 480 return to_unix 481 482 return self.expression(exp.Extract, this=this, expression=expression) 483 484 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 485 if is_map: 486 # Keys are strings in Snowflake's objects, see also: 487 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 488 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 489 return self._parse_slice(self._parse_string()) 490 491 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 492 493 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 494 lateral = super()._parse_lateral() 495 if not lateral: 496 return lateral 497 498 if isinstance(lateral.this, exp.Explode): 499 table_alias = lateral.args.get("alias") 500 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 501 if table_alias and not table_alias.args.get("columns"): 502 table_alias.set("columns", columns) 503 elif not table_alias: 504 exp.alias_(lateral, "_flattened", table=columns, copy=False) 505 506 return lateral 507 508 def _parse_at_before(self, table: exp.Table) -> exp.Table: 509 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 510 index = self._index 511 if self._match_texts(("AT", "BEFORE")): 512 this = self._prev.text.upper() 513 kind = ( 514 self._match(TokenType.L_PAREN) 515 and self._match_texts(self.HISTORICAL_DATA_KIND) 516 and self._prev.text.upper() 517 ) 518 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 519 520 if expression: 521 self._match_r_paren() 522 when = self.expression( 523 exp.HistoricalData, this=this, kind=kind, expression=expression 524 ) 525 table.set("when", when) 526 else: 527 self._retreat(index) 528 529 return table 530 531 def _parse_table_parts( 532 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 533 ) -> exp.Table: 534 # https://docs.snowflake.com/en/user-guide/querying-stage 535 if self._match(TokenType.STRING, advance=False): 536 table = self._parse_string() 537 elif self._match_text_seq("@", advance=False): 538 table = self._parse_location_path() 539 else: 540 table = None 541 542 if table: 543 file_format = None 544 pattern = None 545 546 wrapped = self._match(TokenType.L_PAREN) 547 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 548 if self._match_text_seq("FILE_FORMAT", "=>"): 549 file_format = self._parse_string() or super()._parse_table_parts( 550 is_db_reference=is_db_reference 551 ) 552 elif self._match_text_seq("PATTERN", "=>"): 553 pattern = self._parse_string() 554 else: 555 break 556 557 self._match(TokenType.COMMA) 558 559 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 560 else: 561 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 562 563 return self._parse_at_before(table) 564 565 def _parse_id_var( 566 self, 567 any_token: bool = True, 568 tokens: t.Optional[t.Collection[TokenType]] = None, 569 ) -> t.Optional[exp.Expression]: 570 if self._match_text_seq("IDENTIFIER", "("): 571 identifier = ( 572 super()._parse_id_var(any_token=any_token, tokens=tokens) 573 or self._parse_string() 574 ) 575 self._match_r_paren() 576 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 577 578 return super()._parse_id_var(any_token=any_token, tokens=tokens) 579 580 def _parse_show_snowflake(self, this: str) -> exp.Show: 581 scope = None 582 scope_kind = None 583 584 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 585 # which is syntactically valid but has no effect on the output 586 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 587 588 history = self._match_text_seq("HISTORY") 589 590 like = self._parse_string() if self._match(TokenType.LIKE) else None 591 592 if self._match(TokenType.IN): 593 if self._match_text_seq("ACCOUNT"): 594 scope_kind = "ACCOUNT" 595 elif self._match_set(self.DB_CREATABLES): 596 scope_kind = self._prev.text.upper() 597 if self._curr: 598 scope = self._parse_table_parts() 599 elif self._curr: 600 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 601 scope = self._parse_table_parts() 602 603 return self.expression( 604 exp.Show, 605 **{ 606 "terse": terse, 607 "this": this, 608 "history": history, 609 "like": like, 610 "scope": scope, 611 "scope_kind": scope_kind, 612 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 613 "limit": self._parse_limit(), 614 "from": self._parse_string() if self._match(TokenType.FROM) else None, 615 }, 616 ) 617 618 def _parse_alter_table_swap(self) -> exp.SwapTable: 619 self._match_text_seq("WITH") 620 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 621 622 def _parse_location_property(self) -> exp.LocationProperty: 623 self._match(TokenType.EQ) 624 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 625 626 def _parse_file_location(self) -> t.Optional[exp.Expression]: 627 # Parse either a subquery or a staged file 628 return ( 629 self._parse_select(table=True, parse_subquery_alias=False) 630 if self._match(TokenType.L_PAREN, advance=False) 631 else self._parse_table_parts() 632 ) 633 634 def _parse_location_path(self) -> exp.Var: 635 parts = [self._advance_any(ignore_reserved=True)] 636 637 # We avoid consuming a comma token because external tables like @foo and @bar 638 # can be joined in a query with a comma separator, as well as closing paren 639 # in case of subqueries 640 while self._is_connected() and not self._match_set( 641 (TokenType.COMMA, TokenType.R_PAREN), advance=False 642 ): 643 parts.append(self._advance_any(ignore_reserved=True)) 644 645 return exp.var("".join(part.text for part in parts if part)) 646 647 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 648 this = super()._parse_lambda_arg() 649 650 if not this: 651 return this 652 653 typ = self._parse_types() 654 655 if typ: 656 return self.expression(exp.Cast, this=this, to=typ) 657 658 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
660 class Tokenizer(tokens.Tokenizer): 661 STRING_ESCAPES = ["\\", "'"] 662 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 663 RAW_STRINGS = ["$$"] 664 COMMENTS = ["--", "//", ("/*", "*/")] 665 666 KEYWORDS = { 667 **tokens.Tokenizer.KEYWORDS, 668 "BYTEINT": TokenType.INT, 669 "CHAR VARYING": TokenType.VARCHAR, 670 "CHARACTER VARYING": TokenType.VARCHAR, 671 "EXCLUDE": TokenType.EXCEPT, 672 "ILIKE ANY": TokenType.ILIKE_ANY, 673 "LIKE ANY": TokenType.LIKE_ANY, 674 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 675 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 676 "MINUS": TokenType.EXCEPT, 677 "NCHAR VARYING": TokenType.VARCHAR, 678 "PUT": TokenType.COMMAND, 679 "REMOVE": TokenType.COMMAND, 680 "RM": TokenType.COMMAND, 681 "SAMPLE": TokenType.TABLE_SAMPLE, 682 "SQL_DOUBLE": TokenType.DOUBLE, 683 "SQL_VARCHAR": TokenType.VARCHAR, 684 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 685 "TAG": TokenType.TAG, 686 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 687 "TOP": TokenType.TOP, 688 "WAREHOUSE": TokenType.WAREHOUSE, 689 "STREAMLIT": TokenType.STREAMLIT, 690 } 691 KEYWORDS.pop("/*+") 692 693 SINGLE_TOKENS = { 694 **tokens.Tokenizer.SINGLE_TOKENS, 695 "$": TokenType.PARAMETER, 696 } 697 698 VAR_SINGLE_TOKENS = {"$"} 699 700 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
702 class Generator(generator.Generator): 703 PARAMETER_TOKEN = "$" 704 MATCHED_BY_SOURCE = False 705 SINGLE_STRING_INTERVAL = True 706 JOIN_HINTS = False 707 TABLE_HINTS = False 708 QUERY_HINTS = False 709 AGGREGATE_FILTER_SUPPORTED = False 710 SUPPORTS_TABLE_COPY = False 711 COLLATE_IS_FUNC = True 712 LIMIT_ONLY_LITERALS = True 713 JSON_KEY_VALUE_PAIR_SEP = "," 714 INSERT_OVERWRITE = " OVERWRITE INTO" 715 STRUCT_DELIMITER = ("(", ")") 716 COPY_PARAMS_ARE_WRAPPED = False 717 COPY_PARAMS_EQ_REQUIRED = True 718 STAR_EXCEPT = "EXCLUDE" 719 720 TRANSFORMS = { 721 **generator.Generator.TRANSFORMS, 722 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 723 exp.ArgMax: rename_func("MAX_BY"), 724 exp.ArgMin: rename_func("MIN_BY"), 725 exp.Array: inline_array_sql, 726 exp.ArrayConcat: rename_func("ARRAY_CAT"), 727 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 728 exp.AtTimeZone: lambda self, e: self.func( 729 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 730 ), 731 exp.BitwiseXor: rename_func("BITXOR"), 732 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 733 exp.DateAdd: date_delta_sql("DATEADD"), 734 exp.DateDiff: date_delta_sql("DATEDIFF"), 735 exp.DateStrToDate: datestrtodate_sql, 736 exp.DayOfMonth: rename_func("DAYOFMONTH"), 737 exp.DayOfWeek: rename_func("DAYOFWEEK"), 738 exp.DayOfYear: rename_func("DAYOFYEAR"), 739 exp.Explode: rename_func("FLATTEN"), 740 exp.Extract: rename_func("DATE_PART"), 741 exp.FromTimeZone: lambda self, e: self.func( 742 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 743 ), 744 exp.GenerateSeries: lambda self, e: self.func( 745 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 746 ), 747 exp.GroupConcat: rename_func("LISTAGG"), 748 exp.If: if_sql(name="IFF", false_value="NULL"), 749 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 750 exp.JSONExtractScalar: lambda self, e: self.func( 751 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 752 ), 753 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 754 exp.JSONPathRoot: lambda *_: "", 755 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 756 exp.LogicalOr: rename_func("BOOLOR_AGG"), 757 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 758 exp.Max: max_or_greatest, 759 exp.Min: min_or_least, 760 exp.ParseJSON: lambda self, e: self.func( 761 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 762 ), 763 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 764 exp.PercentileCont: transforms.preprocess( 765 [transforms.add_within_group_for_percentiles] 766 ), 767 exp.PercentileDisc: transforms.preprocess( 768 [transforms.add_within_group_for_percentiles] 769 ), 770 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 771 exp.RegexpILike: _regexpilike_sql, 772 exp.Rand: rename_func("RANDOM"), 773 exp.Select: transforms.preprocess( 774 [ 775 transforms.eliminate_distinct_on, 776 transforms.explode_to_unnest(), 777 transforms.eliminate_semi_and_anti_joins, 778 ] 779 ), 780 exp.SHA: rename_func("SHA1"), 781 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 782 exp.StartsWith: rename_func("STARTSWITH"), 783 exp.StrPosition: lambda self, e: self.func( 784 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 785 ), 786 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 787 exp.Stuff: rename_func("INSERT"), 788 exp.TimeAdd: date_delta_sql("TIMEADD"), 789 exp.TimestampDiff: lambda self, e: self.func( 790 "TIMESTAMPDIFF", e.unit, e.expression, e.this 791 ), 792 exp.TimestampTrunc: timestamptrunc_sql(), 793 exp.TimeStrToTime: timestrtotime_sql, 794 exp.TimeToStr: lambda self, e: self.func( 795 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 796 ), 797 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 798 exp.ToArray: rename_func("TO_ARRAY"), 799 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 800 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 801 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 802 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 803 exp.TsOrDsToDate: lambda self, e: self.func( 804 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 805 ), 806 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 807 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 808 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 809 exp.Xor: rename_func("BOOLXOR"), 810 } 811 812 SUPPORTED_JSON_PATH_PARTS = { 813 exp.JSONPathKey, 814 exp.JSONPathRoot, 815 exp.JSONPathSubscript, 816 } 817 818 TYPE_MAPPING = { 819 **generator.Generator.TYPE_MAPPING, 820 exp.DataType.Type.NESTED: "OBJECT", 821 exp.DataType.Type.STRUCT: "OBJECT", 822 } 823 824 PROPERTIES_LOCATION = { 825 **generator.Generator.PROPERTIES_LOCATION, 826 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 827 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 828 } 829 830 UNSUPPORTED_VALUES_EXPRESSIONS = { 831 exp.Map, 832 exp.StarMap, 833 exp.Struct, 834 exp.VarMap, 835 } 836 837 def with_properties(self, properties: exp.Properties) -> str: 838 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 839 840 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 841 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 842 values_as_table = False 843 844 return super().values_sql(expression, values_as_table=values_as_table) 845 846 def datatype_sql(self, expression: exp.DataType) -> str: 847 expressions = expression.expressions 848 if ( 849 expressions 850 and expression.is_type(*exp.DataType.STRUCT_TYPES) 851 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 852 ): 853 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 854 return "OBJECT" 855 856 return super().datatype_sql(expression) 857 858 def tonumber_sql(self, expression: exp.ToNumber) -> str: 859 return self.func( 860 "TO_NUMBER", 861 expression.this, 862 expression.args.get("format"), 863 expression.args.get("precision"), 864 expression.args.get("scale"), 865 ) 866 867 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 868 milli = expression.args.get("milli") 869 if milli is not None: 870 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 871 expression.set("nano", milli_to_nano) 872 873 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 874 875 def trycast_sql(self, expression: exp.TryCast) -> str: 876 value = expression.this 877 878 if value.type is None: 879 from sqlglot.optimizer.annotate_types import annotate_types 880 881 value = annotate_types(value) 882 883 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 884 return super().trycast_sql(expression) 885 886 # TRY_CAST only works for string values in Snowflake 887 return self.cast_sql(expression) 888 889 def log_sql(self, expression: exp.Log) -> str: 890 if not expression.expression: 891 return self.func("LN", expression.this) 892 893 return super().log_sql(expression) 894 895 def unnest_sql(self, expression: exp.Unnest) -> str: 896 unnest_alias = expression.args.get("alias") 897 offset = expression.args.get("offset") 898 899 columns = [ 900 exp.to_identifier("seq"), 901 exp.to_identifier("key"), 902 exp.to_identifier("path"), 903 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 904 seq_get(unnest_alias.columns if unnest_alias else [], 0) 905 or exp.to_identifier("value"), 906 exp.to_identifier("this"), 907 ] 908 909 if unnest_alias: 910 unnest_alias.set("columns", columns) 911 else: 912 unnest_alias = exp.TableAlias(this="_u", columns=columns) 913 914 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 915 alias = self.sql(unnest_alias) 916 alias = f" AS {alias}" if alias else "" 917 return f"{explode}{alias}" 918 919 def show_sql(self, expression: exp.Show) -> str: 920 terse = "TERSE " if expression.args.get("terse") else "" 921 history = " HISTORY" if expression.args.get("history") else "" 922 like = self.sql(expression, "like") 923 like = f" LIKE {like}" if like else "" 924 925 scope = self.sql(expression, "scope") 926 scope = f" {scope}" if scope else "" 927 928 scope_kind = self.sql(expression, "scope_kind") 929 if scope_kind: 930 scope_kind = f" IN {scope_kind}" 931 932 starts_with = self.sql(expression, "starts_with") 933 if starts_with: 934 starts_with = f" STARTS WITH {starts_with}" 935 936 limit = self.sql(expression, "limit") 937 938 from_ = self.sql(expression, "from") 939 if from_: 940 from_ = f" FROM {from_}" 941 942 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 943 944 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 945 # Other dialects don't support all of the following parameters, so we need to 946 # generate default values as necessary to ensure the transpilation is correct 947 group = expression.args.get("group") 948 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 949 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 950 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 951 952 return self.func( 953 "REGEXP_SUBSTR", 954 expression.this, 955 expression.expression, 956 position, 957 occurrence, 958 parameters, 959 group, 960 ) 961 962 def except_op(self, expression: exp.Except) -> str: 963 if not expression.args.get("distinct"): 964 self.unsupported("EXCEPT with All is not supported in Snowflake") 965 return super().except_op(expression) 966 967 def intersect_op(self, expression: exp.Intersect) -> str: 968 if not expression.args.get("distinct"): 969 self.unsupported("INTERSECT with All is not supported in Snowflake") 970 return super().intersect_op(expression) 971 972 def describe_sql(self, expression: exp.Describe) -> str: 973 # Default to table if kind is unknown 974 kind_value = expression.args.get("kind") or "TABLE" 975 kind = f" {kind_value}" if kind_value else "" 976 this = f" {self.sql(expression, 'this')}" 977 expressions = self.expressions(expression, flat=True) 978 expressions = f" {expressions}" if expressions else "" 979 return f"DESCRIBE{kind}{this}{expressions}" 980 981 def generatedasidentitycolumnconstraint_sql( 982 self, expression: exp.GeneratedAsIdentityColumnConstraint 983 ) -> str: 984 start = expression.args.get("start") 985 start = f" START {start}" if start else "" 986 increment = expression.args.get("increment") 987 increment = f" INCREMENT {increment}" if increment else "" 988 return f"AUTOINCREMENT{start}{increment}" 989 990 def swaptable_sql(self, expression: exp.SwapTable) -> str: 991 this = self.sql(expression, "this") 992 return f"SWAP WITH {this}" 993 994 def cluster_sql(self, expression: exp.Cluster) -> str: 995 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 996 997 def struct_sql(self, expression: exp.Struct) -> str: 998 keys = [] 999 values = [] 1000 1001 for i, e in enumerate(expression.expressions): 1002 if isinstance(e, exp.PropertyEQ): 1003 keys.append( 1004 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1005 ) 1006 values.append(e.expression) 1007 else: 1008 keys.append(exp.Literal.string(f"_{i}")) 1009 values.append(e) 1010 1011 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1012 1013 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1014 if expression.args.get("weight") or expression.args.get("accuracy"): 1015 self.unsupported( 1016 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1017 ) 1018 1019 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1020 1021 def alterset_sql(self, expression: exp.AlterSet) -> str: 1022 exprs = self.expressions(expression, flat=True) 1023 exprs = f" {exprs}" if exprs else "" 1024 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1025 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1026 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1027 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1028 tag = self.expressions(expression, key="tag", flat=True) 1029 tag = f" TAG {tag}" if tag else "" 1030 1031 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
846 def datatype_sql(self, expression: exp.DataType) -> str: 847 expressions = expression.expressions 848 if ( 849 expressions 850 and expression.is_type(*exp.DataType.STRUCT_TYPES) 851 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 852 ): 853 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 854 return "OBJECT" 855 856 return super().datatype_sql(expression)
867 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 868 milli = expression.args.get("milli") 869 if milli is not None: 870 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 871 expression.set("nano", milli_to_nano) 872 873 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
875 def trycast_sql(self, expression: exp.TryCast) -> str: 876 value = expression.this 877 878 if value.type is None: 879 from sqlglot.optimizer.annotate_types import annotate_types 880 881 value = annotate_types(value) 882 883 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 884 return super().trycast_sql(expression) 885 886 # TRY_CAST only works for string values in Snowflake 887 return self.cast_sql(expression)
895 def unnest_sql(self, expression: exp.Unnest) -> str: 896 unnest_alias = expression.args.get("alias") 897 offset = expression.args.get("offset") 898 899 columns = [ 900 exp.to_identifier("seq"), 901 exp.to_identifier("key"), 902 exp.to_identifier("path"), 903 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 904 seq_get(unnest_alias.columns if unnest_alias else [], 0) 905 or exp.to_identifier("value"), 906 exp.to_identifier("this"), 907 ] 908 909 if unnest_alias: 910 unnest_alias.set("columns", columns) 911 else: 912 unnest_alias = exp.TableAlias(this="_u", columns=columns) 913 914 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 915 alias = self.sql(unnest_alias) 916 alias = f" AS {alias}" if alias else "" 917 return f"{explode}{alias}"
919 def show_sql(self, expression: exp.Show) -> str: 920 terse = "TERSE " if expression.args.get("terse") else "" 921 history = " HISTORY" if expression.args.get("history") else "" 922 like = self.sql(expression, "like") 923 like = f" LIKE {like}" if like else "" 924 925 scope = self.sql(expression, "scope") 926 scope = f" {scope}" if scope else "" 927 928 scope_kind = self.sql(expression, "scope_kind") 929 if scope_kind: 930 scope_kind = f" IN {scope_kind}" 931 932 starts_with = self.sql(expression, "starts_with") 933 if starts_with: 934 starts_with = f" STARTS WITH {starts_with}" 935 936 limit = self.sql(expression, "limit") 937 938 from_ = self.sql(expression, "from") 939 if from_: 940 from_ = f" FROM {from_}" 941 942 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
944 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 945 # Other dialects don't support all of the following parameters, so we need to 946 # generate default values as necessary to ensure the transpilation is correct 947 group = expression.args.get("group") 948 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 949 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 950 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 951 952 return self.func( 953 "REGEXP_SUBSTR", 954 expression.this, 955 expression.expression, 956 position, 957 occurrence, 958 parameters, 959 group, 960 )
972 def describe_sql(self, expression: exp.Describe) -> str: 973 # Default to table if kind is unknown 974 kind_value = expression.args.get("kind") or "TABLE" 975 kind = f" {kind_value}" if kind_value else "" 976 this = f" {self.sql(expression, 'this')}" 977 expressions = self.expressions(expression, flat=True) 978 expressions = f" {expressions}" if expressions else "" 979 return f"DESCRIBE{kind}{this}{expressions}"
981 def generatedasidentitycolumnconstraint_sql( 982 self, expression: exp.GeneratedAsIdentityColumnConstraint 983 ) -> str: 984 start = expression.args.get("start") 985 start = f" START {start}" if start else "" 986 increment = expression.args.get("increment") 987 increment = f" INCREMENT {increment}" if increment else "" 988 return f"AUTOINCREMENT{start}{increment}"
997 def struct_sql(self, expression: exp.Struct) -> str: 998 keys = [] 999 values = [] 1000 1001 for i, e in enumerate(expression.expressions): 1002 if isinstance(e, exp.PropertyEQ): 1003 keys.append( 1004 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1005 ) 1006 values.append(e.expression) 1007 else: 1008 keys.append(exp.Literal.string(f"_{i}")) 1009 values.append(e) 1010 1011 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1013 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1014 if expression.args.get("weight") or expression.args.get("accuracy"): 1015 self.unsupported( 1016 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1017 ) 1018 1019 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1021 def alterset_sql(self, expression: exp.AlterSet) -> str: 1022 exprs = self.expressions(expression, flat=True) 1023 exprs = f" {exprs}" if exprs else "" 1024 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1025 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1026 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1027 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1028 tag = self.expressions(expression, key="tag", flat=True) 1029 tag = f" TAG {tag}" if tag else "" 1030 1031 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- strtodate_sql
- strtotime_sql