sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 struct_extract_sql, 26 str_position_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31) 32from sqlglot.dialects.hive import Hive 33from sqlglot.dialects.mysql import MySQL 34from sqlglot.helper import apply_index_offset, seq_get 35from sqlglot.tokens import TokenType 36from sqlglot.transforms import unqualify_columns 37 38 39def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 40 if isinstance(expression.this, exp.Explode): 41 return self.sql( 42 exp.Join( 43 this=exp.Unnest( 44 expressions=[expression.this.this], 45 alias=expression.args.get("alias"), 46 offset=isinstance(expression.this, exp.Posexplode), 47 ), 48 kind="cross", 49 ) 50 ) 51 return self.lateral_sql(expression) 52 53 54def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 55 regex = r"(\w)(\w*)" 56 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 57 58 59def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 60 if expression.args.get("asc") == exp.false(): 61 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 62 else: 63 comparator = None 64 return self.func("ARRAY_SORT", expression.this, comparator) 65 66 67def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 68 if isinstance(expression.parent, exp.Property): 69 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 70 return f"ARRAY[{columns}]" 71 72 if expression.parent: 73 for schema in expression.parent.find_all(exp.Schema): 74 column_defs = schema.find_all(exp.ColumnDef) 75 if column_defs and isinstance(schema.parent, exp.Property): 76 expression.expressions.extend(column_defs) 77 78 return self.schema_sql(expression) 79 80 81def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 82 self.unsupported("Presto does not support exact quantiles") 83 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 84 85 86def _str_to_time_sql( 87 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 88) -> str: 89 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 90 91 92def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 93 time_format = self.format_time(expression) 94 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 95 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 96 return self.sql( 97 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 98 ) 99 100 101def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 102 expression = ts_or_ds_add_cast(expression) 103 unit = unit_to_str(expression) 104 return self.func("DATE_ADD", unit, expression.expression, expression.this) 105 106 107def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 108 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 109 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 110 unit = unit_to_str(expression) 111 return self.func("DATE_DIFF", unit, expr, this) 112 113 114def _build_approx_percentile(args: t.List) -> exp.Expression: 115 if len(args) == 4: 116 return exp.ApproxQuantile( 117 this=seq_get(args, 0), 118 weight=seq_get(args, 1), 119 quantile=seq_get(args, 2), 120 accuracy=seq_get(args, 3), 121 ) 122 if len(args) == 3: 123 return exp.ApproxQuantile( 124 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 125 ) 126 return exp.ApproxQuantile.from_arg_list(args) 127 128 129def _build_from_unixtime(args: t.List) -> exp.Expression: 130 if len(args) == 3: 131 return exp.UnixToTime( 132 this=seq_get(args, 0), 133 hours=seq_get(args, 1), 134 minutes=seq_get(args, 2), 135 ) 136 if len(args) == 2: 137 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 138 139 return exp.UnixToTime.from_arg_list(args) 140 141 142def _unnest_sequence(expression: exp.Expression) -> exp.Expression: 143 if isinstance(expression, exp.Table): 144 if isinstance(expression.this, exp.GenerateSeries): 145 unnest = exp.Unnest(expressions=[expression.this]) 146 147 if expression.alias: 148 return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) 149 return unnest 150 return expression 151 152 153def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 154 """ 155 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 156 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 157 they're converted into an ARBITRARY call. 158 159 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 160 """ 161 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 162 return self.function_fallback_sql(expression) 163 164 return rename_func("ARBITRARY")(self, expression) 165 166 167def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 168 scale = expression.args.get("scale") 169 timestamp = self.sql(expression, "this") 170 if scale in (None, exp.UnixToTime.SECONDS): 171 return rename_func("FROM_UNIXTIME")(self, expression) 172 173 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 174 175 176def _jsonextract_sql(self: Presto.Generator, expression: exp.JSONExtract) -> str: 177 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 178 179 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 180 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 181 if not expression.args.get("variant_extract") or is_json_extract: 182 return self.func( 183 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 184 ) 185 186 this = self.sql(expression, "this") 187 188 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 189 segments = [] 190 for path_key in expression.expression.expressions[1:]: 191 if not isinstance(path_key, exp.JSONPathKey): 192 # Cannot transpile subscripts, wildcards etc to dot notation 193 self.unsupported(f"Cannot transpile JSONPath segment '{path_key}' to ROW access") 194 continue 195 key = path_key.this 196 if not exp.SAFE_IDENTIFIER_RE.match(key): 197 key = f'"{key}"' 198 segments.append(f".{key}") 199 200 expr = "".join(segments) 201 202 return f"{this}{expr}" 203 204 205def _to_int(expression: exp.Expression) -> exp.Expression: 206 if not expression.type: 207 from sqlglot.optimizer.annotate_types import annotate_types 208 209 annotate_types(expression) 210 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 211 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 212 return expression 213 214 215def _build_to_char(args: t.List) -> exp.TimeToStr: 216 fmt = seq_get(args, 1) 217 if isinstance(fmt, exp.Literal): 218 # We uppercase this to match Teradata's format mapping keys 219 fmt.set("this", fmt.this.upper()) 220 221 # We use "teradata" on purpose here, because the time formats are different in Presto. 222 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 223 return build_formatted_time(exp.TimeToStr, "teradata")(args) 224 225 226class Presto(Dialect): 227 INDEX_OFFSET = 1 228 NULL_ORDERING = "nulls_are_last" 229 TIME_FORMAT = MySQL.TIME_FORMAT 230 TIME_MAPPING = MySQL.TIME_MAPPING 231 STRICT_STRING_CONCAT = True 232 SUPPORTS_SEMI_ANTI_JOIN = False 233 TYPED_DIVISION = True 234 TABLESAMPLE_SIZE_IS_PERCENT = True 235 LOG_BASE_FIRST: t.Optional[bool] = None 236 237 # https://github.com/trinodb/trino/issues/17 238 # https://github.com/trinodb/trino/issues/12289 239 # https://github.com/prestodb/presto/issues/2863 240 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 241 242 class Tokenizer(tokens.Tokenizer): 243 UNICODE_STRINGS = [ 244 (prefix + q, q) 245 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 246 for prefix in ("U&", "u&") 247 ] 248 249 KEYWORDS = { 250 **tokens.Tokenizer.KEYWORDS, 251 "START": TokenType.BEGIN, 252 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 253 "ROW": TokenType.STRUCT, 254 "IPADDRESS": TokenType.IPADDRESS, 255 "IPPREFIX": TokenType.IPPREFIX, 256 "TDIGEST": TokenType.TDIGEST, 257 "HYPERLOGLOG": TokenType.HLLSKETCH, 258 } 259 KEYWORDS.pop("/*+") 260 KEYWORDS.pop("QUALIFY") 261 262 class Parser(parser.Parser): 263 VALUES_FOLLOWED_BY_PAREN = False 264 265 FUNCTIONS = { 266 **parser.Parser.FUNCTIONS, 267 "ARBITRARY": exp.AnyValue.from_arg_list, 268 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 269 "APPROX_PERCENTILE": _build_approx_percentile, 270 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 271 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 272 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 273 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 274 "CARDINALITY": exp.ArraySize.from_arg_list, 275 "CONTAINS": exp.ArrayContains.from_arg_list, 276 "DATE_ADD": lambda args: exp.DateAdd( 277 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 278 ), 279 "DATE_DIFF": lambda args: exp.DateDiff( 280 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 281 ), 282 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 283 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 284 "DATE_TRUNC": date_trunc_to_time, 285 "ELEMENT_AT": lambda args: exp.Bracket( 286 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 287 ), 288 "FROM_HEX": exp.Unhex.from_arg_list, 289 "FROM_UNIXTIME": _build_from_unixtime, 290 "FROM_UTF8": lambda args: exp.Decode( 291 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 292 ), 293 "NOW": exp.CurrentTimestamp.from_arg_list, 294 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 295 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 296 ), 297 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 298 this=seq_get(args, 0), 299 expression=seq_get(args, 1), 300 replacement=seq_get(args, 2) or exp.Literal.string(""), 301 ), 302 "ROW": exp.Struct.from_arg_list, 303 "SEQUENCE": exp.GenerateSeries.from_arg_list, 304 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 305 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 306 "STRPOS": lambda args: exp.StrPosition( 307 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 308 ), 309 "TO_CHAR": _build_to_char, 310 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 311 "TO_UTF8": lambda args: exp.Encode( 312 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 313 ), 314 "MD5": exp.MD5Digest.from_arg_list, 315 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 316 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 317 } 318 319 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 320 FUNCTION_PARSERS.pop("TRIM") 321 322 class Generator(generator.Generator): 323 INTERVAL_ALLOWS_PLURAL_FORM = False 324 JOIN_HINTS = False 325 TABLE_HINTS = False 326 QUERY_HINTS = False 327 IS_BOOL_ALLOWED = False 328 TZ_TO_WITH_TIME_ZONE = True 329 NVL2_SUPPORTED = False 330 STRUCT_DELIMITER = ("(", ")") 331 LIMIT_ONLY_LITERALS = True 332 SUPPORTS_SINGLE_ARG_CONCAT = False 333 LIKE_PROPERTY_INSIDE_SCHEMA = True 334 MULTI_ARG_DISTINCT = False 335 SUPPORTS_TO_NUMBER = False 336 HEX_FUNC = "TO_HEX" 337 PARSE_JSON_NAME = "JSON_PARSE" 338 339 PROPERTIES_LOCATION = { 340 **generator.Generator.PROPERTIES_LOCATION, 341 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 342 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 343 } 344 345 TYPE_MAPPING = { 346 **generator.Generator.TYPE_MAPPING, 347 exp.DataType.Type.INT: "INTEGER", 348 exp.DataType.Type.FLOAT: "REAL", 349 exp.DataType.Type.BINARY: "VARBINARY", 350 exp.DataType.Type.TEXT: "VARCHAR", 351 exp.DataType.Type.TIMETZ: "TIME", 352 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 353 exp.DataType.Type.STRUCT: "ROW", 354 exp.DataType.Type.DATETIME: "TIMESTAMP", 355 exp.DataType.Type.DATETIME64: "TIMESTAMP", 356 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 357 } 358 359 TRANSFORMS = { 360 **generator.Generator.TRANSFORMS, 361 exp.AnyValue: rename_func("ARBITRARY"), 362 exp.ApproxDistinct: lambda self, e: self.func( 363 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 364 ), 365 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 366 exp.ArgMax: rename_func("MAX_BY"), 367 exp.ArgMin: rename_func("MIN_BY"), 368 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 369 exp.ArrayAny: rename_func("ANY_MATCH"), 370 exp.ArrayConcat: rename_func("CONCAT"), 371 exp.ArrayContains: rename_func("CONTAINS"), 372 exp.ArraySize: rename_func("CARDINALITY"), 373 exp.ArrayToString: rename_func("ARRAY_JOIN"), 374 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 375 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 376 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 377 exp.BitwiseLeftShift: lambda self, e: self.func( 378 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 379 ), 380 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 381 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 382 exp.BitwiseRightShift: lambda self, e: self.func( 383 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 384 ), 385 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 386 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 387 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 388 exp.DateAdd: lambda self, e: self.func( 389 "DATE_ADD", 390 unit_to_str(e), 391 _to_int(e.expression), 392 e.this, 393 ), 394 exp.DateDiff: lambda self, e: self.func( 395 "DATE_DIFF", unit_to_str(e), e.expression, e.this 396 ), 397 exp.DateStrToDate: datestrtodate_sql, 398 exp.DateToDi: lambda self, 399 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 400 exp.DateSub: lambda self, e: self.func( 401 "DATE_ADD", 402 unit_to_str(e), 403 _to_int(e.expression * -1), 404 e.this, 405 ), 406 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 407 exp.DiToDate: lambda self, 408 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 409 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 410 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 411 exp.First: _first_last_sql, 412 exp.FirstValue: _first_last_sql, 413 exp.FromTimeZone: lambda self, 414 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 415 exp.Group: transforms.preprocess([transforms.unalias_group]), 416 exp.GroupConcat: lambda self, e: self.func( 417 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 418 ), 419 exp.If: if_sql(), 420 exp.ILike: no_ilike_sql, 421 exp.Initcap: _initcap_sql, 422 exp.JSONExtract: _jsonextract_sql, 423 exp.Last: _first_last_sql, 424 exp.LastValue: _first_last_sql, 425 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 426 exp.Lateral: _explode_to_unnest_sql, 427 exp.Left: left_to_substring_sql, 428 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 429 exp.LogicalAnd: rename_func("BOOL_AND"), 430 exp.LogicalOr: rename_func("BOOL_OR"), 431 exp.Pivot: no_pivot_sql, 432 exp.Quantile: _quantile_sql, 433 exp.RegexpExtract: regexp_extract_sql, 434 exp.Right: right_to_substring_sql, 435 exp.SafeDivide: no_safe_divide_sql, 436 exp.Schema: _schema_sql, 437 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 438 exp.Select: transforms.preprocess( 439 [ 440 transforms.eliminate_qualify, 441 transforms.eliminate_distinct_on, 442 transforms.explode_to_unnest(1), 443 transforms.eliminate_semi_and_anti_joins, 444 ] 445 ), 446 exp.SortArray: _no_sort_array, 447 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 448 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 449 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 450 exp.StrToTime: _str_to_time_sql, 451 exp.StructExtract: struct_extract_sql, 452 exp.Table: transforms.preprocess([_unnest_sequence]), 453 exp.Timestamp: no_timestamp_sql, 454 exp.TimestampTrunc: timestamptrunc_sql(), 455 exp.TimeStrToDate: timestrtotime_sql, 456 exp.TimeStrToTime: timestrtotime_sql, 457 exp.TimeStrToUnix: lambda self, e: self.func( 458 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 459 ), 460 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 461 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 462 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 463 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 464 exp.TsOrDiToDi: lambda self, 465 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 466 exp.TsOrDsAdd: _ts_or_ds_add_sql, 467 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 468 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 469 exp.Unhex: rename_func("FROM_HEX"), 470 exp.UnixToStr: lambda self, 471 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 472 exp.UnixToTime: _unix_to_time_sql, 473 exp.UnixToTimeStr: lambda self, 474 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 475 exp.VariancePop: rename_func("VAR_POP"), 476 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 477 exp.WithinGroup: transforms.preprocess( 478 [transforms.remove_within_group_for_percentiles] 479 ), 480 exp.Xor: bool_xor_sql, 481 exp.MD5Digest: rename_func("MD5"), 482 exp.SHA: rename_func("SHA1"), 483 exp.SHA2: sha256_sql, 484 } 485 486 RESERVED_KEYWORDS = { 487 "alter", 488 "and", 489 "as", 490 "between", 491 "by", 492 "case", 493 "cast", 494 "constraint", 495 "create", 496 "cross", 497 "current_time", 498 "current_timestamp", 499 "deallocate", 500 "delete", 501 "describe", 502 "distinct", 503 "drop", 504 "else", 505 "end", 506 "escape", 507 "except", 508 "execute", 509 "exists", 510 "extract", 511 "false", 512 "for", 513 "from", 514 "full", 515 "group", 516 "having", 517 "in", 518 "inner", 519 "insert", 520 "intersect", 521 "into", 522 "is", 523 "join", 524 "left", 525 "like", 526 "natural", 527 "not", 528 "null", 529 "on", 530 "or", 531 "order", 532 "outer", 533 "prepare", 534 "right", 535 "select", 536 "table", 537 "then", 538 "true", 539 "union", 540 "using", 541 "values", 542 "when", 543 "where", 544 "with", 545 } 546 547 def md5_sql(self, expression: exp.MD5) -> str: 548 this = expression.this 549 550 if not this.type: 551 from sqlglot.optimizer.annotate_types import annotate_types 552 553 this = annotate_types(this) 554 555 if this.is_type(*exp.DataType.TEXT_TYPES): 556 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 557 558 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 559 560 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 561 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 562 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 563 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 564 # which seems to be using the same time mapping as Hive, as per: 565 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 566 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 567 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 568 parse_with_tz = self.func( 569 "PARSE_DATETIME", 570 value_as_text, 571 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 572 ) 573 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 574 return self.func("TO_UNIXTIME", coalesced) 575 576 def bracket_sql(self, expression: exp.Bracket) -> str: 577 if expression.args.get("safe"): 578 return self.func( 579 "ELEMENT_AT", 580 expression.this, 581 seq_get( 582 apply_index_offset( 583 expression.this, 584 expression.expressions, 585 1 - expression.args.get("offset", 0), 586 ), 587 0, 588 ), 589 ) 590 return super().bracket_sql(expression) 591 592 def struct_sql(self, expression: exp.Struct) -> str: 593 from sqlglot.optimizer.annotate_types import annotate_types 594 595 expression = annotate_types(expression) 596 values: t.List[str] = [] 597 schema: t.List[str] = [] 598 unknown_type = False 599 600 for e in expression.expressions: 601 if isinstance(e, exp.PropertyEQ): 602 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 603 unknown_type = True 604 else: 605 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 606 values.append(self.sql(e, "expression")) 607 else: 608 values.append(self.sql(e)) 609 610 size = len(expression.expressions) 611 612 if not size or len(schema) != size: 613 if unknown_type: 614 self.unsupported( 615 "Cannot convert untyped key-value definitions (try annotate_types)." 616 ) 617 return self.func("ROW", *values) 618 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 619 620 def interval_sql(self, expression: exp.Interval) -> str: 621 if expression.this and expression.text("unit").upper().startswith("WEEK"): 622 return f"({expression.this.name} * INTERVAL '7' DAY)" 623 return super().interval_sql(expression) 624 625 def transaction_sql(self, expression: exp.Transaction) -> str: 626 modes = expression.args.get("modes") 627 modes = f" {', '.join(modes)}" if modes else "" 628 return f"START TRANSACTION{modes}" 629 630 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 631 start = expression.args["start"] 632 end = expression.args["end"] 633 step = expression.args.get("step") 634 635 if isinstance(start, exp.Cast): 636 target_type = start.to 637 elif isinstance(end, exp.Cast): 638 target_type = end.to 639 else: 640 target_type = None 641 642 if target_type and target_type.is_type("timestamp"): 643 if target_type is start.to: 644 end = exp.cast(end, target_type) 645 else: 646 start = exp.cast(start, target_type) 647 648 return self.func("SEQUENCE", start, end, step) 649 650 def offset_limit_modifiers( 651 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 652 ) -> t.List[str]: 653 return [ 654 self.sql(expression, "offset"), 655 self.sql(limit), 656 ] 657 658 def create_sql(self, expression: exp.Create) -> str: 659 """ 660 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 661 so we need to remove them 662 """ 663 kind = expression.args["kind"] 664 schema = expression.this 665 if kind == "VIEW" and schema.expressions: 666 expression.this.set("expressions", None) 667 return super().create_sql(expression) 668 669 def delete_sql(self, expression: exp.Delete) -> str: 670 """ 671 Presto only supports DELETE FROM for a single table without an alias, so we need 672 to remove the unnecessary parts. If the original DELETE statement contains more 673 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 674 """ 675 tables = expression.args.get("tables") or [expression.this] 676 if len(tables) > 1: 677 return super().delete_sql(expression) 678 679 table = tables[0] 680 expression.set("this", table) 681 expression.set("tables", None) 682 683 if isinstance(table, exp.Table): 684 table_alias = table.args.get("alias") 685 if table_alias: 686 table_alias.pop() 687 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 688 689 return super().delete_sql(expression)
227class Presto(Dialect): 228 INDEX_OFFSET = 1 229 NULL_ORDERING = "nulls_are_last" 230 TIME_FORMAT = MySQL.TIME_FORMAT 231 TIME_MAPPING = MySQL.TIME_MAPPING 232 STRICT_STRING_CONCAT = True 233 SUPPORTS_SEMI_ANTI_JOIN = False 234 TYPED_DIVISION = True 235 TABLESAMPLE_SIZE_IS_PERCENT = True 236 LOG_BASE_FIRST: t.Optional[bool] = None 237 238 # https://github.com/trinodb/trino/issues/17 239 # https://github.com/trinodb/trino/issues/12289 240 # https://github.com/prestodb/presto/issues/2863 241 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 242 243 class Tokenizer(tokens.Tokenizer): 244 UNICODE_STRINGS = [ 245 (prefix + q, q) 246 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 247 for prefix in ("U&", "u&") 248 ] 249 250 KEYWORDS = { 251 **tokens.Tokenizer.KEYWORDS, 252 "START": TokenType.BEGIN, 253 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 254 "ROW": TokenType.STRUCT, 255 "IPADDRESS": TokenType.IPADDRESS, 256 "IPPREFIX": TokenType.IPPREFIX, 257 "TDIGEST": TokenType.TDIGEST, 258 "HYPERLOGLOG": TokenType.HLLSKETCH, 259 } 260 KEYWORDS.pop("/*+") 261 KEYWORDS.pop("QUALIFY") 262 263 class Parser(parser.Parser): 264 VALUES_FOLLOWED_BY_PAREN = False 265 266 FUNCTIONS = { 267 **parser.Parser.FUNCTIONS, 268 "ARBITRARY": exp.AnyValue.from_arg_list, 269 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 270 "APPROX_PERCENTILE": _build_approx_percentile, 271 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 272 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 273 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 274 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 275 "CARDINALITY": exp.ArraySize.from_arg_list, 276 "CONTAINS": exp.ArrayContains.from_arg_list, 277 "DATE_ADD": lambda args: exp.DateAdd( 278 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 279 ), 280 "DATE_DIFF": lambda args: exp.DateDiff( 281 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 282 ), 283 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 284 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 285 "DATE_TRUNC": date_trunc_to_time, 286 "ELEMENT_AT": lambda args: exp.Bracket( 287 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 288 ), 289 "FROM_HEX": exp.Unhex.from_arg_list, 290 "FROM_UNIXTIME": _build_from_unixtime, 291 "FROM_UTF8": lambda args: exp.Decode( 292 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 293 ), 294 "NOW": exp.CurrentTimestamp.from_arg_list, 295 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 296 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 297 ), 298 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 299 this=seq_get(args, 0), 300 expression=seq_get(args, 1), 301 replacement=seq_get(args, 2) or exp.Literal.string(""), 302 ), 303 "ROW": exp.Struct.from_arg_list, 304 "SEQUENCE": exp.GenerateSeries.from_arg_list, 305 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 306 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 307 "STRPOS": lambda args: exp.StrPosition( 308 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 309 ), 310 "TO_CHAR": _build_to_char, 311 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 312 "TO_UTF8": lambda args: exp.Encode( 313 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 314 ), 315 "MD5": exp.MD5Digest.from_arg_list, 316 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 317 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 318 } 319 320 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 321 FUNCTION_PARSERS.pop("TRIM") 322 323 class Generator(generator.Generator): 324 INTERVAL_ALLOWS_PLURAL_FORM = False 325 JOIN_HINTS = False 326 TABLE_HINTS = False 327 QUERY_HINTS = False 328 IS_BOOL_ALLOWED = False 329 TZ_TO_WITH_TIME_ZONE = True 330 NVL2_SUPPORTED = False 331 STRUCT_DELIMITER = ("(", ")") 332 LIMIT_ONLY_LITERALS = True 333 SUPPORTS_SINGLE_ARG_CONCAT = False 334 LIKE_PROPERTY_INSIDE_SCHEMA = True 335 MULTI_ARG_DISTINCT = False 336 SUPPORTS_TO_NUMBER = False 337 HEX_FUNC = "TO_HEX" 338 PARSE_JSON_NAME = "JSON_PARSE" 339 340 PROPERTIES_LOCATION = { 341 **generator.Generator.PROPERTIES_LOCATION, 342 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 343 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 344 } 345 346 TYPE_MAPPING = { 347 **generator.Generator.TYPE_MAPPING, 348 exp.DataType.Type.INT: "INTEGER", 349 exp.DataType.Type.FLOAT: "REAL", 350 exp.DataType.Type.BINARY: "VARBINARY", 351 exp.DataType.Type.TEXT: "VARCHAR", 352 exp.DataType.Type.TIMETZ: "TIME", 353 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 354 exp.DataType.Type.STRUCT: "ROW", 355 exp.DataType.Type.DATETIME: "TIMESTAMP", 356 exp.DataType.Type.DATETIME64: "TIMESTAMP", 357 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 358 } 359 360 TRANSFORMS = { 361 **generator.Generator.TRANSFORMS, 362 exp.AnyValue: rename_func("ARBITRARY"), 363 exp.ApproxDistinct: lambda self, e: self.func( 364 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 365 ), 366 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 367 exp.ArgMax: rename_func("MAX_BY"), 368 exp.ArgMin: rename_func("MIN_BY"), 369 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 370 exp.ArrayAny: rename_func("ANY_MATCH"), 371 exp.ArrayConcat: rename_func("CONCAT"), 372 exp.ArrayContains: rename_func("CONTAINS"), 373 exp.ArraySize: rename_func("CARDINALITY"), 374 exp.ArrayToString: rename_func("ARRAY_JOIN"), 375 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 376 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 377 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 378 exp.BitwiseLeftShift: lambda self, e: self.func( 379 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 380 ), 381 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 382 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 383 exp.BitwiseRightShift: lambda self, e: self.func( 384 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 385 ), 386 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 387 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 388 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 389 exp.DateAdd: lambda self, e: self.func( 390 "DATE_ADD", 391 unit_to_str(e), 392 _to_int(e.expression), 393 e.this, 394 ), 395 exp.DateDiff: lambda self, e: self.func( 396 "DATE_DIFF", unit_to_str(e), e.expression, e.this 397 ), 398 exp.DateStrToDate: datestrtodate_sql, 399 exp.DateToDi: lambda self, 400 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 401 exp.DateSub: lambda self, e: self.func( 402 "DATE_ADD", 403 unit_to_str(e), 404 _to_int(e.expression * -1), 405 e.this, 406 ), 407 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 408 exp.DiToDate: lambda self, 409 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 410 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 411 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 412 exp.First: _first_last_sql, 413 exp.FirstValue: _first_last_sql, 414 exp.FromTimeZone: lambda self, 415 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 416 exp.Group: transforms.preprocess([transforms.unalias_group]), 417 exp.GroupConcat: lambda self, e: self.func( 418 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 419 ), 420 exp.If: if_sql(), 421 exp.ILike: no_ilike_sql, 422 exp.Initcap: _initcap_sql, 423 exp.JSONExtract: _jsonextract_sql, 424 exp.Last: _first_last_sql, 425 exp.LastValue: _first_last_sql, 426 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 427 exp.Lateral: _explode_to_unnest_sql, 428 exp.Left: left_to_substring_sql, 429 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 430 exp.LogicalAnd: rename_func("BOOL_AND"), 431 exp.LogicalOr: rename_func("BOOL_OR"), 432 exp.Pivot: no_pivot_sql, 433 exp.Quantile: _quantile_sql, 434 exp.RegexpExtract: regexp_extract_sql, 435 exp.Right: right_to_substring_sql, 436 exp.SafeDivide: no_safe_divide_sql, 437 exp.Schema: _schema_sql, 438 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 439 exp.Select: transforms.preprocess( 440 [ 441 transforms.eliminate_qualify, 442 transforms.eliminate_distinct_on, 443 transforms.explode_to_unnest(1), 444 transforms.eliminate_semi_and_anti_joins, 445 ] 446 ), 447 exp.SortArray: _no_sort_array, 448 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 449 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 450 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 451 exp.StrToTime: _str_to_time_sql, 452 exp.StructExtract: struct_extract_sql, 453 exp.Table: transforms.preprocess([_unnest_sequence]), 454 exp.Timestamp: no_timestamp_sql, 455 exp.TimestampTrunc: timestamptrunc_sql(), 456 exp.TimeStrToDate: timestrtotime_sql, 457 exp.TimeStrToTime: timestrtotime_sql, 458 exp.TimeStrToUnix: lambda self, e: self.func( 459 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 460 ), 461 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 462 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 463 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 464 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 465 exp.TsOrDiToDi: lambda self, 466 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 467 exp.TsOrDsAdd: _ts_or_ds_add_sql, 468 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 469 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 470 exp.Unhex: rename_func("FROM_HEX"), 471 exp.UnixToStr: lambda self, 472 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 473 exp.UnixToTime: _unix_to_time_sql, 474 exp.UnixToTimeStr: lambda self, 475 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 476 exp.VariancePop: rename_func("VAR_POP"), 477 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 478 exp.WithinGroup: transforms.preprocess( 479 [transforms.remove_within_group_for_percentiles] 480 ), 481 exp.Xor: bool_xor_sql, 482 exp.MD5Digest: rename_func("MD5"), 483 exp.SHA: rename_func("SHA1"), 484 exp.SHA2: sha256_sql, 485 } 486 487 RESERVED_KEYWORDS = { 488 "alter", 489 "and", 490 "as", 491 "between", 492 "by", 493 "case", 494 "cast", 495 "constraint", 496 "create", 497 "cross", 498 "current_time", 499 "current_timestamp", 500 "deallocate", 501 "delete", 502 "describe", 503 "distinct", 504 "drop", 505 "else", 506 "end", 507 "escape", 508 "except", 509 "execute", 510 "exists", 511 "extract", 512 "false", 513 "for", 514 "from", 515 "full", 516 "group", 517 "having", 518 "in", 519 "inner", 520 "insert", 521 "intersect", 522 "into", 523 "is", 524 "join", 525 "left", 526 "like", 527 "natural", 528 "not", 529 "null", 530 "on", 531 "or", 532 "order", 533 "outer", 534 "prepare", 535 "right", 536 "select", 537 "table", 538 "then", 539 "true", 540 "union", 541 "using", 542 "values", 543 "when", 544 "where", 545 "with", 546 } 547 548 def md5_sql(self, expression: exp.MD5) -> str: 549 this = expression.this 550 551 if not this.type: 552 from sqlglot.optimizer.annotate_types import annotate_types 553 554 this = annotate_types(this) 555 556 if this.is_type(*exp.DataType.TEXT_TYPES): 557 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 558 559 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 560 561 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 562 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 563 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 564 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 565 # which seems to be using the same time mapping as Hive, as per: 566 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 567 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 568 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 569 parse_with_tz = self.func( 570 "PARSE_DATETIME", 571 value_as_text, 572 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 573 ) 574 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 575 return self.func("TO_UNIXTIME", coalesced) 576 577 def bracket_sql(self, expression: exp.Bracket) -> str: 578 if expression.args.get("safe"): 579 return self.func( 580 "ELEMENT_AT", 581 expression.this, 582 seq_get( 583 apply_index_offset( 584 expression.this, 585 expression.expressions, 586 1 - expression.args.get("offset", 0), 587 ), 588 0, 589 ), 590 ) 591 return super().bracket_sql(expression) 592 593 def struct_sql(self, expression: exp.Struct) -> str: 594 from sqlglot.optimizer.annotate_types import annotate_types 595 596 expression = annotate_types(expression) 597 values: t.List[str] = [] 598 schema: t.List[str] = [] 599 unknown_type = False 600 601 for e in expression.expressions: 602 if isinstance(e, exp.PropertyEQ): 603 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 604 unknown_type = True 605 else: 606 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 607 values.append(self.sql(e, "expression")) 608 else: 609 values.append(self.sql(e)) 610 611 size = len(expression.expressions) 612 613 if not size or len(schema) != size: 614 if unknown_type: 615 self.unsupported( 616 "Cannot convert untyped key-value definitions (try annotate_types)." 617 ) 618 return self.func("ROW", *values) 619 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 620 621 def interval_sql(self, expression: exp.Interval) -> str: 622 if expression.this and expression.text("unit").upper().startswith("WEEK"): 623 return f"({expression.this.name} * INTERVAL '7' DAY)" 624 return super().interval_sql(expression) 625 626 def transaction_sql(self, expression: exp.Transaction) -> str: 627 modes = expression.args.get("modes") 628 modes = f" {', '.join(modes)}" if modes else "" 629 return f"START TRANSACTION{modes}" 630 631 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 632 start = expression.args["start"] 633 end = expression.args["end"] 634 step = expression.args.get("step") 635 636 if isinstance(start, exp.Cast): 637 target_type = start.to 638 elif isinstance(end, exp.Cast): 639 target_type = end.to 640 else: 641 target_type = None 642 643 if target_type and target_type.is_type("timestamp"): 644 if target_type is start.to: 645 end = exp.cast(end, target_type) 646 else: 647 start = exp.cast(start, target_type) 648 649 return self.func("SEQUENCE", start, end, step) 650 651 def offset_limit_modifiers( 652 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 653 ) -> t.List[str]: 654 return [ 655 self.sql(expression, "offset"), 656 self.sql(limit), 657 ] 658 659 def create_sql(self, expression: exp.Create) -> str: 660 """ 661 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 662 so we need to remove them 663 """ 664 kind = expression.args["kind"] 665 schema = expression.this 666 if kind == "VIEW" and schema.expressions: 667 expression.this.set("expressions", None) 668 return super().create_sql(expression) 669 670 def delete_sql(self, expression: exp.Delete) -> str: 671 """ 672 Presto only supports DELETE FROM for a single table without an alias, so we need 673 to remove the unnecessary parts. If the original DELETE statement contains more 674 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 675 """ 676 tables = expression.args.get("tables") or [expression.this] 677 if len(tables) > 1: 678 return super().delete_sql(expression) 679 680 table = tables[0] 681 expression.set("this", table) 682 expression.set("tables", None) 683 684 if isinstance(table, exp.Table): 685 table_alias = table.args.get("alias") 686 if table_alias: 687 table_alias.pop() 688 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 689 690 return super().delete_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Associates this dialect's time formats with their equivalent Python strftime
formats.
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
243 class Tokenizer(tokens.Tokenizer): 244 UNICODE_STRINGS = [ 245 (prefix + q, q) 246 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 247 for prefix in ("U&", "u&") 248 ] 249 250 KEYWORDS = { 251 **tokens.Tokenizer.KEYWORDS, 252 "START": TokenType.BEGIN, 253 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 254 "ROW": TokenType.STRUCT, 255 "IPADDRESS": TokenType.IPADDRESS, 256 "IPPREFIX": TokenType.IPPREFIX, 257 "TDIGEST": TokenType.TDIGEST, 258 "HYPERLOGLOG": TokenType.HLLSKETCH, 259 } 260 KEYWORDS.pop("/*+") 261 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
263 class Parser(parser.Parser): 264 VALUES_FOLLOWED_BY_PAREN = False 265 266 FUNCTIONS = { 267 **parser.Parser.FUNCTIONS, 268 "ARBITRARY": exp.AnyValue.from_arg_list, 269 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 270 "APPROX_PERCENTILE": _build_approx_percentile, 271 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 272 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 273 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 274 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 275 "CARDINALITY": exp.ArraySize.from_arg_list, 276 "CONTAINS": exp.ArrayContains.from_arg_list, 277 "DATE_ADD": lambda args: exp.DateAdd( 278 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 279 ), 280 "DATE_DIFF": lambda args: exp.DateDiff( 281 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 282 ), 283 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 284 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 285 "DATE_TRUNC": date_trunc_to_time, 286 "ELEMENT_AT": lambda args: exp.Bracket( 287 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 288 ), 289 "FROM_HEX": exp.Unhex.from_arg_list, 290 "FROM_UNIXTIME": _build_from_unixtime, 291 "FROM_UTF8": lambda args: exp.Decode( 292 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 293 ), 294 "NOW": exp.CurrentTimestamp.from_arg_list, 295 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 296 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 297 ), 298 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 299 this=seq_get(args, 0), 300 expression=seq_get(args, 1), 301 replacement=seq_get(args, 2) or exp.Literal.string(""), 302 ), 303 "ROW": exp.Struct.from_arg_list, 304 "SEQUENCE": exp.GenerateSeries.from_arg_list, 305 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 306 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 307 "STRPOS": lambda args: exp.StrPosition( 308 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 309 ), 310 "TO_CHAR": _build_to_char, 311 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 312 "TO_UTF8": lambda args: exp.Encode( 313 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 314 ), 315 "MD5": exp.MD5Digest.from_arg_list, 316 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 317 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 318 } 319 320 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 321 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
323 class Generator(generator.Generator): 324 INTERVAL_ALLOWS_PLURAL_FORM = False 325 JOIN_HINTS = False 326 TABLE_HINTS = False 327 QUERY_HINTS = False 328 IS_BOOL_ALLOWED = False 329 TZ_TO_WITH_TIME_ZONE = True 330 NVL2_SUPPORTED = False 331 STRUCT_DELIMITER = ("(", ")") 332 LIMIT_ONLY_LITERALS = True 333 SUPPORTS_SINGLE_ARG_CONCAT = False 334 LIKE_PROPERTY_INSIDE_SCHEMA = True 335 MULTI_ARG_DISTINCT = False 336 SUPPORTS_TO_NUMBER = False 337 HEX_FUNC = "TO_HEX" 338 PARSE_JSON_NAME = "JSON_PARSE" 339 340 PROPERTIES_LOCATION = { 341 **generator.Generator.PROPERTIES_LOCATION, 342 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 343 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 344 } 345 346 TYPE_MAPPING = { 347 **generator.Generator.TYPE_MAPPING, 348 exp.DataType.Type.INT: "INTEGER", 349 exp.DataType.Type.FLOAT: "REAL", 350 exp.DataType.Type.BINARY: "VARBINARY", 351 exp.DataType.Type.TEXT: "VARCHAR", 352 exp.DataType.Type.TIMETZ: "TIME", 353 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 354 exp.DataType.Type.STRUCT: "ROW", 355 exp.DataType.Type.DATETIME: "TIMESTAMP", 356 exp.DataType.Type.DATETIME64: "TIMESTAMP", 357 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 358 } 359 360 TRANSFORMS = { 361 **generator.Generator.TRANSFORMS, 362 exp.AnyValue: rename_func("ARBITRARY"), 363 exp.ApproxDistinct: lambda self, e: self.func( 364 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 365 ), 366 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 367 exp.ArgMax: rename_func("MAX_BY"), 368 exp.ArgMin: rename_func("MIN_BY"), 369 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 370 exp.ArrayAny: rename_func("ANY_MATCH"), 371 exp.ArrayConcat: rename_func("CONCAT"), 372 exp.ArrayContains: rename_func("CONTAINS"), 373 exp.ArraySize: rename_func("CARDINALITY"), 374 exp.ArrayToString: rename_func("ARRAY_JOIN"), 375 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 376 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 377 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 378 exp.BitwiseLeftShift: lambda self, e: self.func( 379 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 380 ), 381 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 382 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 383 exp.BitwiseRightShift: lambda self, e: self.func( 384 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 385 ), 386 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 387 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 388 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 389 exp.DateAdd: lambda self, e: self.func( 390 "DATE_ADD", 391 unit_to_str(e), 392 _to_int(e.expression), 393 e.this, 394 ), 395 exp.DateDiff: lambda self, e: self.func( 396 "DATE_DIFF", unit_to_str(e), e.expression, e.this 397 ), 398 exp.DateStrToDate: datestrtodate_sql, 399 exp.DateToDi: lambda self, 400 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 401 exp.DateSub: lambda self, e: self.func( 402 "DATE_ADD", 403 unit_to_str(e), 404 _to_int(e.expression * -1), 405 e.this, 406 ), 407 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 408 exp.DiToDate: lambda self, 409 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 410 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 411 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 412 exp.First: _first_last_sql, 413 exp.FirstValue: _first_last_sql, 414 exp.FromTimeZone: lambda self, 415 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 416 exp.Group: transforms.preprocess([transforms.unalias_group]), 417 exp.GroupConcat: lambda self, e: self.func( 418 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 419 ), 420 exp.If: if_sql(), 421 exp.ILike: no_ilike_sql, 422 exp.Initcap: _initcap_sql, 423 exp.JSONExtract: _jsonextract_sql, 424 exp.Last: _first_last_sql, 425 exp.LastValue: _first_last_sql, 426 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 427 exp.Lateral: _explode_to_unnest_sql, 428 exp.Left: left_to_substring_sql, 429 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 430 exp.LogicalAnd: rename_func("BOOL_AND"), 431 exp.LogicalOr: rename_func("BOOL_OR"), 432 exp.Pivot: no_pivot_sql, 433 exp.Quantile: _quantile_sql, 434 exp.RegexpExtract: regexp_extract_sql, 435 exp.Right: right_to_substring_sql, 436 exp.SafeDivide: no_safe_divide_sql, 437 exp.Schema: _schema_sql, 438 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 439 exp.Select: transforms.preprocess( 440 [ 441 transforms.eliminate_qualify, 442 transforms.eliminate_distinct_on, 443 transforms.explode_to_unnest(1), 444 transforms.eliminate_semi_and_anti_joins, 445 ] 446 ), 447 exp.SortArray: _no_sort_array, 448 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 449 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 450 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 451 exp.StrToTime: _str_to_time_sql, 452 exp.StructExtract: struct_extract_sql, 453 exp.Table: transforms.preprocess([_unnest_sequence]), 454 exp.Timestamp: no_timestamp_sql, 455 exp.TimestampTrunc: timestamptrunc_sql(), 456 exp.TimeStrToDate: timestrtotime_sql, 457 exp.TimeStrToTime: timestrtotime_sql, 458 exp.TimeStrToUnix: lambda self, e: self.func( 459 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 460 ), 461 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 462 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 463 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 464 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 465 exp.TsOrDiToDi: lambda self, 466 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 467 exp.TsOrDsAdd: _ts_or_ds_add_sql, 468 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 469 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 470 exp.Unhex: rename_func("FROM_HEX"), 471 exp.UnixToStr: lambda self, 472 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 473 exp.UnixToTime: _unix_to_time_sql, 474 exp.UnixToTimeStr: lambda self, 475 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 476 exp.VariancePop: rename_func("VAR_POP"), 477 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 478 exp.WithinGroup: transforms.preprocess( 479 [transforms.remove_within_group_for_percentiles] 480 ), 481 exp.Xor: bool_xor_sql, 482 exp.MD5Digest: rename_func("MD5"), 483 exp.SHA: rename_func("SHA1"), 484 exp.SHA2: sha256_sql, 485 } 486 487 RESERVED_KEYWORDS = { 488 "alter", 489 "and", 490 "as", 491 "between", 492 "by", 493 "case", 494 "cast", 495 "constraint", 496 "create", 497 "cross", 498 "current_time", 499 "current_timestamp", 500 "deallocate", 501 "delete", 502 "describe", 503 "distinct", 504 "drop", 505 "else", 506 "end", 507 "escape", 508 "except", 509 "execute", 510 "exists", 511 "extract", 512 "false", 513 "for", 514 "from", 515 "full", 516 "group", 517 "having", 518 "in", 519 "inner", 520 "insert", 521 "intersect", 522 "into", 523 "is", 524 "join", 525 "left", 526 "like", 527 "natural", 528 "not", 529 "null", 530 "on", 531 "or", 532 "order", 533 "outer", 534 "prepare", 535 "right", 536 "select", 537 "table", 538 "then", 539 "true", 540 "union", 541 "using", 542 "values", 543 "when", 544 "where", 545 "with", 546 } 547 548 def md5_sql(self, expression: exp.MD5) -> str: 549 this = expression.this 550 551 if not this.type: 552 from sqlglot.optimizer.annotate_types import annotate_types 553 554 this = annotate_types(this) 555 556 if this.is_type(*exp.DataType.TEXT_TYPES): 557 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 558 559 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 560 561 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 562 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 563 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 564 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 565 # which seems to be using the same time mapping as Hive, as per: 566 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 567 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 568 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 569 parse_with_tz = self.func( 570 "PARSE_DATETIME", 571 value_as_text, 572 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 573 ) 574 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 575 return self.func("TO_UNIXTIME", coalesced) 576 577 def bracket_sql(self, expression: exp.Bracket) -> str: 578 if expression.args.get("safe"): 579 return self.func( 580 "ELEMENT_AT", 581 expression.this, 582 seq_get( 583 apply_index_offset( 584 expression.this, 585 expression.expressions, 586 1 - expression.args.get("offset", 0), 587 ), 588 0, 589 ), 590 ) 591 return super().bracket_sql(expression) 592 593 def struct_sql(self, expression: exp.Struct) -> str: 594 from sqlglot.optimizer.annotate_types import annotate_types 595 596 expression = annotate_types(expression) 597 values: t.List[str] = [] 598 schema: t.List[str] = [] 599 unknown_type = False 600 601 for e in expression.expressions: 602 if isinstance(e, exp.PropertyEQ): 603 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 604 unknown_type = True 605 else: 606 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 607 values.append(self.sql(e, "expression")) 608 else: 609 values.append(self.sql(e)) 610 611 size = len(expression.expressions) 612 613 if not size or len(schema) != size: 614 if unknown_type: 615 self.unsupported( 616 "Cannot convert untyped key-value definitions (try annotate_types)." 617 ) 618 return self.func("ROW", *values) 619 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 620 621 def interval_sql(self, expression: exp.Interval) -> str: 622 if expression.this and expression.text("unit").upper().startswith("WEEK"): 623 return f"({expression.this.name} * INTERVAL '7' DAY)" 624 return super().interval_sql(expression) 625 626 def transaction_sql(self, expression: exp.Transaction) -> str: 627 modes = expression.args.get("modes") 628 modes = f" {', '.join(modes)}" if modes else "" 629 return f"START TRANSACTION{modes}" 630 631 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 632 start = expression.args["start"] 633 end = expression.args["end"] 634 step = expression.args.get("step") 635 636 if isinstance(start, exp.Cast): 637 target_type = start.to 638 elif isinstance(end, exp.Cast): 639 target_type = end.to 640 else: 641 target_type = None 642 643 if target_type and target_type.is_type("timestamp"): 644 if target_type is start.to: 645 end = exp.cast(end, target_type) 646 else: 647 start = exp.cast(start, target_type) 648 649 return self.func("SEQUENCE", start, end, step) 650 651 def offset_limit_modifiers( 652 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 653 ) -> t.List[str]: 654 return [ 655 self.sql(expression, "offset"), 656 self.sql(limit), 657 ] 658 659 def create_sql(self, expression: exp.Create) -> str: 660 """ 661 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 662 so we need to remove them 663 """ 664 kind = expression.args["kind"] 665 schema = expression.this 666 if kind == "VIEW" and schema.expressions: 667 expression.this.set("expressions", None) 668 return super().create_sql(expression) 669 670 def delete_sql(self, expression: exp.Delete) -> str: 671 """ 672 Presto only supports DELETE FROM for a single table without an alias, so we need 673 to remove the unnecessary parts. If the original DELETE statement contains more 674 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 675 """ 676 tables = expression.args.get("tables") or [expression.this] 677 if len(tables) > 1: 678 return super().delete_sql(expression) 679 680 table = tables[0] 681 expression.set("this", table) 682 expression.set("tables", None) 683 684 if isinstance(table, exp.Table): 685 table_alias = table.args.get("alias") 686 if table_alias: 687 table_alias.pop() 688 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 689 690 return super().delete_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
548 def md5_sql(self, expression: exp.MD5) -> str: 549 this = expression.this 550 551 if not this.type: 552 from sqlglot.optimizer.annotate_types import annotate_types 553 554 this = annotate_types(this) 555 556 if this.is_type(*exp.DataType.TEXT_TYPES): 557 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 558 559 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
561 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 562 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 563 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 564 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 565 # which seems to be using the same time mapping as Hive, as per: 566 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 567 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 568 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 569 parse_with_tz = self.func( 570 "PARSE_DATETIME", 571 value_as_text, 572 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 573 ) 574 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 575 return self.func("TO_UNIXTIME", coalesced)
577 def bracket_sql(self, expression: exp.Bracket) -> str: 578 if expression.args.get("safe"): 579 return self.func( 580 "ELEMENT_AT", 581 expression.this, 582 seq_get( 583 apply_index_offset( 584 expression.this, 585 expression.expressions, 586 1 - expression.args.get("offset", 0), 587 ), 588 0, 589 ), 590 ) 591 return super().bracket_sql(expression)
593 def struct_sql(self, expression: exp.Struct) -> str: 594 from sqlglot.optimizer.annotate_types import annotate_types 595 596 expression = annotate_types(expression) 597 values: t.List[str] = [] 598 schema: t.List[str] = [] 599 unknown_type = False 600 601 for e in expression.expressions: 602 if isinstance(e, exp.PropertyEQ): 603 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 604 unknown_type = True 605 else: 606 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 607 values.append(self.sql(e, "expression")) 608 else: 609 values.append(self.sql(e)) 610 611 size = len(expression.expressions) 612 613 if not size or len(schema) != size: 614 if unknown_type: 615 self.unsupported( 616 "Cannot convert untyped key-value definitions (try annotate_types)." 617 ) 618 return self.func("ROW", *values) 619 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
631 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 632 start = expression.args["start"] 633 end = expression.args["end"] 634 step = expression.args.get("step") 635 636 if isinstance(start, exp.Cast): 637 target_type = start.to 638 elif isinstance(end, exp.Cast): 639 target_type = end.to 640 else: 641 target_type = None 642 643 if target_type and target_type.is_type("timestamp"): 644 if target_type is start.to: 645 end = exp.cast(end, target_type) 646 else: 647 start = exp.cast(start, target_type) 648 649 return self.func("SEQUENCE", start, end, step)
659 def create_sql(self, expression: exp.Create) -> str: 660 """ 661 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 662 so we need to remove them 663 """ 664 kind = expression.args["kind"] 665 schema = expression.this 666 if kind == "VIEW" and schema.expressions: 667 expression.this.set("expressions", None) 668 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
670 def delete_sql(self, expression: exp.Delete) -> str: 671 """ 672 Presto only supports DELETE FROM for a single table without an alias, so we need 673 to remove the unnecessary parts. If the original DELETE statement contains more 674 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 675 """ 676 tables = expression.args.get("tables") or [expression.this] 677 if len(tables) > 1: 678 return super().delete_sql(expression) 679 680 table = tables[0] 681 expression.set("this", table) 682 expression.set("tables", None) 683 684 if isinstance(table, exp.Table): 685 table_alias = table.args.get("alias") 686 if table_alias: 687 table_alias.pop() 688 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 689 690 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- rand_sql
- strtodate_sql
- strtotime_sql
- changes_sql