sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 return lambda self, this: self._parse_escape( 47 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 48 ) 49 50 51def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 52 # Default argument order is base, expression 53 this = seq_get(args, 0) 54 expression = seq_get(args, 1) 55 56 if expression: 57 if not dialect.LOG_BASE_FIRST: 58 this, expression = expression, this 59 return exp.Log(this=this, expression=expression) 60 61 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 62 63 64def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 65 arg = seq_get(args, 0) 66 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 67 68 69def build_lower(args: t.List) -> exp.Lower | exp.Hex: 70 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 73 74 75def build_upper(args: t.List) -> exp.Upper | exp.Hex: 76 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 79 80 81def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 82 def _builder(args: t.List, dialect: Dialect) -> E: 83 expression = expr_type( 84 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 85 ) 86 if len(args) > 2 and expr_type is exp.JSONExtract: 87 expression.set("expressions", args[2:]) 88 89 return expression 90 91 return _builder 92 93 94def build_mod(args: t.List) -> exp.Mod: 95 this = seq_get(args, 0) 96 expression = seq_get(args, 1) 97 98 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 99 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 100 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 101 102 return exp.Mod(this=this, expression=expression) 103 104 105class _Parser(type): 106 def __new__(cls, clsname, bases, attrs): 107 klass = super().__new__(cls, clsname, bases, attrs) 108 109 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 110 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 111 112 return klass 113 114 115class Parser(metaclass=_Parser): 116 """ 117 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 118 119 Args: 120 error_level: The desired error level. 121 Default: ErrorLevel.IMMEDIATE 122 error_message_context: The amount of context to capture from a query string when displaying 123 the error message (in number of characters). 124 Default: 100 125 max_errors: Maximum number of error messages to include in a raised ParseError. 126 This is only relevant if error_level is ErrorLevel.RAISE. 127 Default: 3 128 """ 129 130 FUNCTIONS: t.Dict[str, t.Callable] = { 131 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 132 "CONCAT": lambda args, dialect: exp.Concat( 133 expressions=args, 134 safe=not dialect.STRICT_STRING_CONCAT, 135 coalesce=dialect.CONCAT_COALESCE, 136 ), 137 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 138 expressions=args, 139 safe=not dialect.STRICT_STRING_CONCAT, 140 coalesce=dialect.CONCAT_COALESCE, 141 ), 142 "DATE_TO_DATE_STR": lambda args: exp.Cast( 143 this=seq_get(args, 0), 144 to=exp.DataType(this=exp.DataType.Type.TEXT), 145 ), 146 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 147 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 148 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 149 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 150 "LIKE": build_like, 151 "LOG": build_logarithm, 152 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 153 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 154 "MOD": build_mod, 155 "TIME_TO_TIME_STR": lambda args: exp.Cast( 156 this=seq_get(args, 0), 157 to=exp.DataType(this=exp.DataType.Type.TEXT), 158 ), 159 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 160 this=exp.Cast( 161 this=seq_get(args, 0), 162 to=exp.DataType(this=exp.DataType.Type.TEXT), 163 ), 164 start=exp.Literal.number(1), 165 length=exp.Literal.number(10), 166 ), 167 "VAR_MAP": build_var_map, 168 "LOWER": build_lower, 169 "UPPER": build_upper, 170 "HEX": build_hex, 171 "TO_HEX": build_hex, 172 } 173 174 NO_PAREN_FUNCTIONS = { 175 TokenType.CURRENT_DATE: exp.CurrentDate, 176 TokenType.CURRENT_DATETIME: exp.CurrentDate, 177 TokenType.CURRENT_TIME: exp.CurrentTime, 178 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 179 TokenType.CURRENT_USER: exp.CurrentUser, 180 } 181 182 STRUCT_TYPE_TOKENS = { 183 TokenType.NESTED, 184 TokenType.OBJECT, 185 TokenType.STRUCT, 186 } 187 188 NESTED_TYPE_TOKENS = { 189 TokenType.ARRAY, 190 TokenType.LOWCARDINALITY, 191 TokenType.MAP, 192 TokenType.NULLABLE, 193 *STRUCT_TYPE_TOKENS, 194 } 195 196 ENUM_TYPE_TOKENS = { 197 TokenType.ENUM, 198 TokenType.ENUM8, 199 TokenType.ENUM16, 200 } 201 202 AGGREGATE_TYPE_TOKENS = { 203 TokenType.AGGREGATEFUNCTION, 204 TokenType.SIMPLEAGGREGATEFUNCTION, 205 } 206 207 TYPE_TOKENS = { 208 TokenType.BIT, 209 TokenType.BOOLEAN, 210 TokenType.TINYINT, 211 TokenType.UTINYINT, 212 TokenType.SMALLINT, 213 TokenType.USMALLINT, 214 TokenType.INT, 215 TokenType.UINT, 216 TokenType.BIGINT, 217 TokenType.UBIGINT, 218 TokenType.INT128, 219 TokenType.UINT128, 220 TokenType.INT256, 221 TokenType.UINT256, 222 TokenType.MEDIUMINT, 223 TokenType.UMEDIUMINT, 224 TokenType.FIXEDSTRING, 225 TokenType.FLOAT, 226 TokenType.DOUBLE, 227 TokenType.CHAR, 228 TokenType.NCHAR, 229 TokenType.VARCHAR, 230 TokenType.NVARCHAR, 231 TokenType.BPCHAR, 232 TokenType.TEXT, 233 TokenType.MEDIUMTEXT, 234 TokenType.LONGTEXT, 235 TokenType.MEDIUMBLOB, 236 TokenType.LONGBLOB, 237 TokenType.BINARY, 238 TokenType.VARBINARY, 239 TokenType.JSON, 240 TokenType.JSONB, 241 TokenType.INTERVAL, 242 TokenType.TINYBLOB, 243 TokenType.TINYTEXT, 244 TokenType.TIME, 245 TokenType.TIMETZ, 246 TokenType.TIMESTAMP, 247 TokenType.TIMESTAMP_S, 248 TokenType.TIMESTAMP_MS, 249 TokenType.TIMESTAMP_NS, 250 TokenType.TIMESTAMPTZ, 251 TokenType.TIMESTAMPLTZ, 252 TokenType.TIMESTAMPNTZ, 253 TokenType.DATETIME, 254 TokenType.DATETIME64, 255 TokenType.DATE, 256 TokenType.DATE32, 257 TokenType.INT4RANGE, 258 TokenType.INT4MULTIRANGE, 259 TokenType.INT8RANGE, 260 TokenType.INT8MULTIRANGE, 261 TokenType.NUMRANGE, 262 TokenType.NUMMULTIRANGE, 263 TokenType.TSRANGE, 264 TokenType.TSMULTIRANGE, 265 TokenType.TSTZRANGE, 266 TokenType.TSTZMULTIRANGE, 267 TokenType.DATERANGE, 268 TokenType.DATEMULTIRANGE, 269 TokenType.DECIMAL, 270 TokenType.UDECIMAL, 271 TokenType.BIGDECIMAL, 272 TokenType.UUID, 273 TokenType.GEOGRAPHY, 274 TokenType.GEOMETRY, 275 TokenType.HLLSKETCH, 276 TokenType.HSTORE, 277 TokenType.PSEUDO_TYPE, 278 TokenType.SUPER, 279 TokenType.SERIAL, 280 TokenType.SMALLSERIAL, 281 TokenType.BIGSERIAL, 282 TokenType.XML, 283 TokenType.YEAR, 284 TokenType.UNIQUEIDENTIFIER, 285 TokenType.USERDEFINED, 286 TokenType.MONEY, 287 TokenType.SMALLMONEY, 288 TokenType.ROWVERSION, 289 TokenType.IMAGE, 290 TokenType.VARIANT, 291 TokenType.OBJECT, 292 TokenType.OBJECT_IDENTIFIER, 293 TokenType.INET, 294 TokenType.IPADDRESS, 295 TokenType.IPPREFIX, 296 TokenType.IPV4, 297 TokenType.IPV6, 298 TokenType.UNKNOWN, 299 TokenType.NULL, 300 TokenType.NAME, 301 TokenType.TDIGEST, 302 *ENUM_TYPE_TOKENS, 303 *NESTED_TYPE_TOKENS, 304 *AGGREGATE_TYPE_TOKENS, 305 } 306 307 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 308 TokenType.BIGINT: TokenType.UBIGINT, 309 TokenType.INT: TokenType.UINT, 310 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 311 TokenType.SMALLINT: TokenType.USMALLINT, 312 TokenType.TINYINT: TokenType.UTINYINT, 313 TokenType.DECIMAL: TokenType.UDECIMAL, 314 } 315 316 SUBQUERY_PREDICATES = { 317 TokenType.ANY: exp.Any, 318 TokenType.ALL: exp.All, 319 TokenType.EXISTS: exp.Exists, 320 TokenType.SOME: exp.Any, 321 } 322 323 RESERVED_TOKENS = { 324 *Tokenizer.SINGLE_TOKENS.values(), 325 TokenType.SELECT, 326 } - {TokenType.IDENTIFIER} 327 328 DB_CREATABLES = { 329 TokenType.DATABASE, 330 TokenType.DICTIONARY, 331 TokenType.MODEL, 332 TokenType.SCHEMA, 333 TokenType.SEQUENCE, 334 TokenType.STORAGE_INTEGRATION, 335 TokenType.TABLE, 336 TokenType.TAG, 337 TokenType.VIEW, 338 } 339 340 CREATABLES = { 341 TokenType.COLUMN, 342 TokenType.CONSTRAINT, 343 TokenType.FOREIGN_KEY, 344 TokenType.FUNCTION, 345 TokenType.INDEX, 346 TokenType.PROCEDURE, 347 *DB_CREATABLES, 348 } 349 350 # Tokens that can represent identifiers 351 ID_VAR_TOKENS = { 352 TokenType.VAR, 353 TokenType.ANTI, 354 TokenType.APPLY, 355 TokenType.ASC, 356 TokenType.ASOF, 357 TokenType.AUTO_INCREMENT, 358 TokenType.BEGIN, 359 TokenType.BPCHAR, 360 TokenType.CACHE, 361 TokenType.CASE, 362 TokenType.COLLATE, 363 TokenType.COMMAND, 364 TokenType.COMMENT, 365 TokenType.COMMIT, 366 TokenType.CONSTRAINT, 367 TokenType.COPY, 368 TokenType.DEFAULT, 369 TokenType.DELETE, 370 TokenType.DESC, 371 TokenType.DESCRIBE, 372 TokenType.DICTIONARY, 373 TokenType.DIV, 374 TokenType.END, 375 TokenType.EXECUTE, 376 TokenType.ESCAPE, 377 TokenType.FALSE, 378 TokenType.FIRST, 379 TokenType.FILTER, 380 TokenType.FINAL, 381 TokenType.FORMAT, 382 TokenType.FULL, 383 TokenType.IDENTIFIER, 384 TokenType.IS, 385 TokenType.ISNULL, 386 TokenType.INTERVAL, 387 TokenType.KEEP, 388 TokenType.KILL, 389 TokenType.LEFT, 390 TokenType.LOAD, 391 TokenType.MERGE, 392 TokenType.NATURAL, 393 TokenType.NEXT, 394 TokenType.OFFSET, 395 TokenType.OPERATOR, 396 TokenType.ORDINALITY, 397 TokenType.OVERLAPS, 398 TokenType.OVERWRITE, 399 TokenType.PARTITION, 400 TokenType.PERCENT, 401 TokenType.PIVOT, 402 TokenType.PRAGMA, 403 TokenType.RANGE, 404 TokenType.RECURSIVE, 405 TokenType.REFERENCES, 406 TokenType.REFRESH, 407 TokenType.REPLACE, 408 TokenType.RIGHT, 409 TokenType.ROLLUP, 410 TokenType.ROW, 411 TokenType.ROWS, 412 TokenType.SEMI, 413 TokenType.SET, 414 TokenType.SETTINGS, 415 TokenType.SHOW, 416 TokenType.TEMPORARY, 417 TokenType.TOP, 418 TokenType.TRUE, 419 TokenType.TRUNCATE, 420 TokenType.UNIQUE, 421 TokenType.UNPIVOT, 422 TokenType.UPDATE, 423 TokenType.USE, 424 TokenType.VOLATILE, 425 TokenType.WINDOW, 426 *CREATABLES, 427 *SUBQUERY_PREDICATES, 428 *TYPE_TOKENS, 429 *NO_PAREN_FUNCTIONS, 430 } 431 432 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 433 434 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 435 TokenType.ANTI, 436 TokenType.APPLY, 437 TokenType.ASOF, 438 TokenType.FULL, 439 TokenType.LEFT, 440 TokenType.LOCK, 441 TokenType.NATURAL, 442 TokenType.OFFSET, 443 TokenType.RIGHT, 444 TokenType.SEMI, 445 TokenType.WINDOW, 446 } 447 448 ALIAS_TOKENS = ID_VAR_TOKENS 449 450 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 451 452 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 453 454 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 455 456 FUNC_TOKENS = { 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.CURRENT_DATE, 460 TokenType.CURRENT_DATETIME, 461 TokenType.CURRENT_TIMESTAMP, 462 TokenType.CURRENT_TIME, 463 TokenType.CURRENT_USER, 464 TokenType.FILTER, 465 TokenType.FIRST, 466 TokenType.FORMAT, 467 TokenType.GLOB, 468 TokenType.IDENTIFIER, 469 TokenType.INDEX, 470 TokenType.ISNULL, 471 TokenType.ILIKE, 472 TokenType.INSERT, 473 TokenType.LIKE, 474 TokenType.MERGE, 475 TokenType.OFFSET, 476 TokenType.PRIMARY_KEY, 477 TokenType.RANGE, 478 TokenType.REPLACE, 479 TokenType.RLIKE, 480 TokenType.ROW, 481 TokenType.UNNEST, 482 TokenType.VAR, 483 TokenType.LEFT, 484 TokenType.RIGHT, 485 TokenType.SEQUENCE, 486 TokenType.DATE, 487 TokenType.DATETIME, 488 TokenType.TABLE, 489 TokenType.TIMESTAMP, 490 TokenType.TIMESTAMPTZ, 491 TokenType.TRUNCATE, 492 TokenType.WINDOW, 493 TokenType.XOR, 494 *TYPE_TOKENS, 495 *SUBQUERY_PREDICATES, 496 } 497 498 CONJUNCTION = { 499 TokenType.AND: exp.And, 500 TokenType.OR: exp.Or, 501 } 502 503 EQUALITY = { 504 TokenType.COLON_EQ: exp.PropertyEQ, 505 TokenType.EQ: exp.EQ, 506 TokenType.NEQ: exp.NEQ, 507 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 508 } 509 510 COMPARISON = { 511 TokenType.GT: exp.GT, 512 TokenType.GTE: exp.GTE, 513 TokenType.LT: exp.LT, 514 TokenType.LTE: exp.LTE, 515 } 516 517 BITWISE = { 518 TokenType.AMP: exp.BitwiseAnd, 519 TokenType.CARET: exp.BitwiseXor, 520 TokenType.PIPE: exp.BitwiseOr, 521 } 522 523 TERM = { 524 TokenType.DASH: exp.Sub, 525 TokenType.PLUS: exp.Add, 526 TokenType.MOD: exp.Mod, 527 TokenType.COLLATE: exp.Collate, 528 } 529 530 FACTOR = { 531 TokenType.DIV: exp.IntDiv, 532 TokenType.LR_ARROW: exp.Distance, 533 TokenType.SLASH: exp.Div, 534 TokenType.STAR: exp.Mul, 535 } 536 537 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 538 539 TIMES = { 540 TokenType.TIME, 541 TokenType.TIMETZ, 542 } 543 544 TIMESTAMPS = { 545 TokenType.TIMESTAMP, 546 TokenType.TIMESTAMPTZ, 547 TokenType.TIMESTAMPLTZ, 548 *TIMES, 549 } 550 551 SET_OPERATIONS = { 552 TokenType.UNION, 553 TokenType.INTERSECT, 554 TokenType.EXCEPT, 555 } 556 557 JOIN_METHODS = { 558 TokenType.ASOF, 559 TokenType.NATURAL, 560 TokenType.POSITIONAL, 561 } 562 563 JOIN_SIDES = { 564 TokenType.LEFT, 565 TokenType.RIGHT, 566 TokenType.FULL, 567 } 568 569 JOIN_KINDS = { 570 TokenType.INNER, 571 TokenType.OUTER, 572 TokenType.CROSS, 573 TokenType.SEMI, 574 TokenType.ANTI, 575 } 576 577 JOIN_HINTS: t.Set[str] = set() 578 579 LAMBDAS = { 580 TokenType.ARROW: lambda self, expressions: self.expression( 581 exp.Lambda, 582 this=self._replace_lambda( 583 self._parse_conjunction(), 584 {node.name for node in expressions}, 585 ), 586 expressions=expressions, 587 ), 588 TokenType.FARROW: lambda self, expressions: self.expression( 589 exp.Kwarg, 590 this=exp.var(expressions[0].name), 591 expression=self._parse_conjunction(), 592 ), 593 } 594 595 COLUMN_OPERATORS = { 596 TokenType.DOT: None, 597 TokenType.DCOLON: lambda self, this, to: self.expression( 598 exp.Cast if self.STRICT_CAST else exp.TryCast, 599 this=this, 600 to=to, 601 ), 602 TokenType.ARROW: lambda self, this, path: self.expression( 603 exp.JSONExtract, 604 this=this, 605 expression=self.dialect.to_json_path(path), 606 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 607 ), 608 TokenType.DARROW: lambda self, this, path: self.expression( 609 exp.JSONExtractScalar, 610 this=this, 611 expression=self.dialect.to_json_path(path), 612 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 613 ), 614 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 615 exp.JSONBExtract, 616 this=this, 617 expression=path, 618 ), 619 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 620 exp.JSONBExtractScalar, 621 this=this, 622 expression=path, 623 ), 624 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 625 exp.JSONBContains, 626 this=this, 627 expression=key, 628 ), 629 } 630 631 EXPRESSION_PARSERS = { 632 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 633 exp.Column: lambda self: self._parse_column(), 634 exp.Condition: lambda self: self._parse_conjunction(), 635 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 636 exp.Expression: lambda self: self._parse_expression(), 637 exp.From: lambda self: self._parse_from(joins=True), 638 exp.Group: lambda self: self._parse_group(), 639 exp.Having: lambda self: self._parse_having(), 640 exp.Identifier: lambda self: self._parse_id_var(), 641 exp.Join: lambda self: self._parse_join(), 642 exp.Lambda: lambda self: self._parse_lambda(), 643 exp.Lateral: lambda self: self._parse_lateral(), 644 exp.Limit: lambda self: self._parse_limit(), 645 exp.Offset: lambda self: self._parse_offset(), 646 exp.Order: lambda self: self._parse_order(), 647 exp.Ordered: lambda self: self._parse_ordered(), 648 exp.Properties: lambda self: self._parse_properties(), 649 exp.Qualify: lambda self: self._parse_qualify(), 650 exp.Returning: lambda self: self._parse_returning(), 651 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 652 exp.Table: lambda self: self._parse_table_parts(), 653 exp.TableAlias: lambda self: self._parse_table_alias(), 654 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 655 exp.Where: lambda self: self._parse_where(), 656 exp.Window: lambda self: self._parse_named_window(), 657 exp.With: lambda self: self._parse_with(), 658 "JOIN_TYPE": lambda self: self._parse_join_parts(), 659 } 660 661 STATEMENT_PARSERS = { 662 TokenType.ALTER: lambda self: self._parse_alter(), 663 TokenType.BEGIN: lambda self: self._parse_transaction(), 664 TokenType.CACHE: lambda self: self._parse_cache(), 665 TokenType.COMMENT: lambda self: self._parse_comment(), 666 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 667 TokenType.COPY: lambda self: self._parse_copy(), 668 TokenType.CREATE: lambda self: self._parse_create(), 669 TokenType.DELETE: lambda self: self._parse_delete(), 670 TokenType.DESC: lambda self: self._parse_describe(), 671 TokenType.DESCRIBE: lambda self: self._parse_describe(), 672 TokenType.DROP: lambda self: self._parse_drop(), 673 TokenType.INSERT: lambda self: self._parse_insert(), 674 TokenType.KILL: lambda self: self._parse_kill(), 675 TokenType.LOAD: lambda self: self._parse_load(), 676 TokenType.MERGE: lambda self: self._parse_merge(), 677 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 678 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 679 TokenType.REFRESH: lambda self: self._parse_refresh(), 680 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 681 TokenType.SET: lambda self: self._parse_set(), 682 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 683 TokenType.UNCACHE: lambda self: self._parse_uncache(), 684 TokenType.UPDATE: lambda self: self._parse_update(), 685 TokenType.USE: lambda self: self.expression( 686 exp.Use, 687 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 688 this=self._parse_table(schema=False), 689 ), 690 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 691 } 692 693 UNARY_PARSERS = { 694 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 695 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 696 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 697 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 698 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 699 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 700 } 701 702 STRING_PARSERS = { 703 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 704 exp.RawString, this=token.text 705 ), 706 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 707 exp.National, this=token.text 708 ), 709 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 710 TokenType.STRING: lambda self, token: self.expression( 711 exp.Literal, this=token.text, is_string=True 712 ), 713 TokenType.UNICODE_STRING: lambda self, token: self.expression( 714 exp.UnicodeString, 715 this=token.text, 716 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 717 ), 718 } 719 720 NUMERIC_PARSERS = { 721 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 722 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 723 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 724 TokenType.NUMBER: lambda self, token: self.expression( 725 exp.Literal, this=token.text, is_string=False 726 ), 727 } 728 729 PRIMARY_PARSERS = { 730 **STRING_PARSERS, 731 **NUMERIC_PARSERS, 732 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 733 TokenType.NULL: lambda self, _: self.expression(exp.Null), 734 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 735 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 736 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 737 TokenType.STAR: lambda self, _: self.expression( 738 exp.Star, 739 **{ 740 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 741 "replace": self._parse_star_op("REPLACE"), 742 "rename": self._parse_star_op("RENAME"), 743 }, 744 ), 745 } 746 747 PLACEHOLDER_PARSERS = { 748 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 749 TokenType.PARAMETER: lambda self: self._parse_parameter(), 750 TokenType.COLON: lambda self: ( 751 self.expression(exp.Placeholder, this=self._prev.text) 752 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 753 else None 754 ), 755 } 756 757 RANGE_PARSERS = { 758 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 759 TokenType.GLOB: binary_range_parser(exp.Glob), 760 TokenType.ILIKE: binary_range_parser(exp.ILike), 761 TokenType.IN: lambda self, this: self._parse_in(this), 762 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 763 TokenType.IS: lambda self, this: self._parse_is(this), 764 TokenType.LIKE: binary_range_parser(exp.Like), 765 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 766 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 767 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 768 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 769 } 770 771 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 772 "ALLOWED_VALUES": lambda self: self.expression( 773 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 774 ), 775 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 776 "AUTO": lambda self: self._parse_auto_property(), 777 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 778 "BACKUP": lambda self: self.expression( 779 exp.BackupProperty, this=self._parse_var(any_token=True) 780 ), 781 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 782 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 783 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 784 "CHECKSUM": lambda self: self._parse_checksum(), 785 "CLUSTER BY": lambda self: self._parse_cluster(), 786 "CLUSTERED": lambda self: self._parse_clustered_by(), 787 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 788 exp.CollateProperty, **kwargs 789 ), 790 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 791 "CONTAINS": lambda self: self._parse_contains_property(), 792 "COPY": lambda self: self._parse_copy_property(), 793 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 794 "DEFINER": lambda self: self._parse_definer(), 795 "DETERMINISTIC": lambda self: self.expression( 796 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 797 ), 798 "DISTKEY": lambda self: self._parse_distkey(), 799 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 800 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 801 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 802 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 803 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 804 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 805 "FREESPACE": lambda self: self._parse_freespace(), 806 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 807 "HEAP": lambda self: self.expression(exp.HeapProperty), 808 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 809 "IMMUTABLE": lambda self: self.expression( 810 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 811 ), 812 "INHERITS": lambda self: self.expression( 813 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 814 ), 815 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 816 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 817 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 818 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 819 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 820 "LIKE": lambda self: self._parse_create_like(), 821 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 822 "LOCK": lambda self: self._parse_locking(), 823 "LOCKING": lambda self: self._parse_locking(), 824 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 825 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 826 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 827 "MODIFIES": lambda self: self._parse_modifies_property(), 828 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 829 "NO": lambda self: self._parse_no_property(), 830 "ON": lambda self: self._parse_on_property(), 831 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 832 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 833 "PARTITION": lambda self: self._parse_partitioned_of(), 834 "PARTITION BY": lambda self: self._parse_partitioned_by(), 835 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 836 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 837 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 838 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 839 "READS": lambda self: self._parse_reads_property(), 840 "REMOTE": lambda self: self._parse_remote_with_connection(), 841 "RETURNS": lambda self: self._parse_returns(), 842 "ROW": lambda self: self._parse_row(), 843 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 844 "SAMPLE": lambda self: self.expression( 845 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 846 ), 847 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 848 "SETTINGS": lambda self: self.expression( 849 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 850 ), 851 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 852 "SORTKEY": lambda self: self._parse_sortkey(), 853 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 854 "STABLE": lambda self: self.expression( 855 exp.StabilityProperty, this=exp.Literal.string("STABLE") 856 ), 857 "STORED": lambda self: self._parse_stored(), 858 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 859 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 860 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 861 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 862 "TO": lambda self: self._parse_to_table(), 863 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 864 "TRANSFORM": lambda self: self.expression( 865 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 866 ), 867 "TTL": lambda self: self._parse_ttl(), 868 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 869 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 870 "VOLATILE": lambda self: self._parse_volatile_property(), 871 "WITH": lambda self: self._parse_with_property(), 872 } 873 874 CONSTRAINT_PARSERS = { 875 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 876 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 877 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 878 "CHARACTER SET": lambda self: self.expression( 879 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 880 ), 881 "CHECK": lambda self: self.expression( 882 exp.CheckColumnConstraint, 883 this=self._parse_wrapped(self._parse_conjunction), 884 enforced=self._match_text_seq("ENFORCED"), 885 ), 886 "COLLATE": lambda self: self.expression( 887 exp.CollateColumnConstraint, this=self._parse_var() 888 ), 889 "COMMENT": lambda self: self.expression( 890 exp.CommentColumnConstraint, this=self._parse_string() 891 ), 892 "COMPRESS": lambda self: self._parse_compress(), 893 "CLUSTERED": lambda self: self.expression( 894 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 895 ), 896 "NONCLUSTERED": lambda self: self.expression( 897 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 898 ), 899 "DEFAULT": lambda self: self.expression( 900 exp.DefaultColumnConstraint, this=self._parse_bitwise() 901 ), 902 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 903 "EPHEMERAL": lambda self: self.expression( 904 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 905 ), 906 "EXCLUDE": lambda self: self.expression( 907 exp.ExcludeColumnConstraint, this=self._parse_index_params() 908 ), 909 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 910 "FORMAT": lambda self: self.expression( 911 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 912 ), 913 "GENERATED": lambda self: self._parse_generated_as_identity(), 914 "IDENTITY": lambda self: self._parse_auto_increment(), 915 "INLINE": lambda self: self._parse_inline(), 916 "LIKE": lambda self: self._parse_create_like(), 917 "NOT": lambda self: self._parse_not_constraint(), 918 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 919 "ON": lambda self: ( 920 self._match(TokenType.UPDATE) 921 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 922 ) 923 or self.expression(exp.OnProperty, this=self._parse_id_var()), 924 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 925 "PERIOD": lambda self: self._parse_period_for_system_time(), 926 "PRIMARY KEY": lambda self: self._parse_primary_key(), 927 "REFERENCES": lambda self: self._parse_references(match=False), 928 "TITLE": lambda self: self.expression( 929 exp.TitleColumnConstraint, this=self._parse_var_or_string() 930 ), 931 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 932 "UNIQUE": lambda self: self._parse_unique(), 933 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 934 "WITH": lambda self: self.expression( 935 exp.Properties, expressions=self._parse_wrapped_properties() 936 ), 937 } 938 939 ALTER_PARSERS = { 940 "ADD": lambda self: self._parse_alter_table_add(), 941 "ALTER": lambda self: self._parse_alter_table_alter(), 942 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 943 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 944 "DROP": lambda self: self._parse_alter_table_drop(), 945 "RENAME": lambda self: self._parse_alter_table_rename(), 946 } 947 948 ALTER_ALTER_PARSERS = { 949 "DISTKEY": lambda self: self._parse_alter_diststyle(), 950 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 951 "SORTKEY": lambda self: self._parse_alter_sortkey(), 952 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 953 } 954 955 SCHEMA_UNNAMED_CONSTRAINTS = { 956 "CHECK", 957 "EXCLUDE", 958 "FOREIGN KEY", 959 "LIKE", 960 "PERIOD", 961 "PRIMARY KEY", 962 "UNIQUE", 963 } 964 965 NO_PAREN_FUNCTION_PARSERS = { 966 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 967 "CASE": lambda self: self._parse_case(), 968 "IF": lambda self: self._parse_if(), 969 "NEXT": lambda self: self._parse_next_value_for(), 970 } 971 972 INVALID_FUNC_NAME_TOKENS = { 973 TokenType.IDENTIFIER, 974 TokenType.STRING, 975 } 976 977 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 978 979 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 980 981 FUNCTION_PARSERS = { 982 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 983 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 984 "DECODE": lambda self: self._parse_decode(), 985 "EXTRACT": lambda self: self._parse_extract(), 986 "JSON_OBJECT": lambda self: self._parse_json_object(), 987 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 988 "JSON_TABLE": lambda self: self._parse_json_table(), 989 "MATCH": lambda self: self._parse_match_against(), 990 "OPENJSON": lambda self: self._parse_open_json(), 991 "POSITION": lambda self: self._parse_position(), 992 "PREDICT": lambda self: self._parse_predict(), 993 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 994 "STRING_AGG": lambda self: self._parse_string_agg(), 995 "SUBSTRING": lambda self: self._parse_substring(), 996 "TRIM": lambda self: self._parse_trim(), 997 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 998 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 999 } 1000 1001 QUERY_MODIFIER_PARSERS = { 1002 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1003 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1004 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1005 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1006 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1007 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1008 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1009 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1010 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1011 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1012 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1013 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1014 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1015 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1016 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1017 TokenType.CLUSTER_BY: lambda self: ( 1018 "cluster", 1019 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1020 ), 1021 TokenType.DISTRIBUTE_BY: lambda self: ( 1022 "distribute", 1023 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1024 ), 1025 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1026 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1027 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1028 } 1029 1030 SET_PARSERS = { 1031 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1032 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1033 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1034 "TRANSACTION": lambda self: self._parse_set_transaction(), 1035 } 1036 1037 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1038 1039 TYPE_LITERAL_PARSERS = { 1040 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1041 } 1042 1043 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1044 1045 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1046 1047 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1048 1049 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1050 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1051 "ISOLATION": ( 1052 ("LEVEL", "REPEATABLE", "READ"), 1053 ("LEVEL", "READ", "COMMITTED"), 1054 ("LEVEL", "READ", "UNCOMITTED"), 1055 ("LEVEL", "SERIALIZABLE"), 1056 ), 1057 "READ": ("WRITE", "ONLY"), 1058 } 1059 1060 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1061 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1062 ) 1063 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1064 1065 CREATE_SEQUENCE: OPTIONS_TYPE = { 1066 "SCALE": ("EXTEND", "NOEXTEND"), 1067 "SHARD": ("EXTEND", "NOEXTEND"), 1068 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1069 **dict.fromkeys( 1070 ( 1071 "SESSION", 1072 "GLOBAL", 1073 "KEEP", 1074 "NOKEEP", 1075 "ORDER", 1076 "NOORDER", 1077 "NOCACHE", 1078 "CYCLE", 1079 "NOCYCLE", 1080 "NOMINVALUE", 1081 "NOMAXVALUE", 1082 "NOSCALE", 1083 "NOSHARD", 1084 ), 1085 tuple(), 1086 ), 1087 } 1088 1089 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1090 1091 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1092 1093 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1094 1095 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1096 1097 CLONE_KEYWORDS = {"CLONE", "COPY"} 1098 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1099 1100 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1101 1102 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1103 1104 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1105 1106 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1107 1108 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1109 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1110 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1111 1112 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1113 1114 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1115 1116 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1117 1118 DISTINCT_TOKENS = {TokenType.DISTINCT} 1119 1120 NULL_TOKENS = {TokenType.NULL} 1121 1122 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1123 1124 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1125 1126 STRICT_CAST = True 1127 1128 PREFIXED_PIVOT_COLUMNS = False 1129 IDENTIFY_PIVOT_STRINGS = False 1130 1131 LOG_DEFAULTS_TO_LN = False 1132 1133 # Whether ADD is present for each column added by ALTER TABLE 1134 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1135 1136 # Whether the table sample clause expects CSV syntax 1137 TABLESAMPLE_CSV = False 1138 1139 # The default method used for table sampling 1140 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1141 1142 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1143 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1144 1145 # Whether the TRIM function expects the characters to trim as its first argument 1146 TRIM_PATTERN_FIRST = False 1147 1148 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1149 STRING_ALIASES = False 1150 1151 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1152 MODIFIERS_ATTACHED_TO_UNION = True 1153 UNION_MODIFIERS = {"order", "limit", "offset"} 1154 1155 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1156 NO_PAREN_IF_COMMANDS = True 1157 1158 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1159 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1160 1161 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1162 # If this is True and '(' is not found, the keyword will be treated as an identifier 1163 VALUES_FOLLOWED_BY_PAREN = True 1164 1165 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1166 SUPPORTS_IMPLICIT_UNNEST = False 1167 1168 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1169 INTERVAL_SPANS = True 1170 1171 # Whether a PARTITION clause can follow a table reference 1172 SUPPORTS_PARTITION_SELECTION = False 1173 1174 __slots__ = ( 1175 "error_level", 1176 "error_message_context", 1177 "max_errors", 1178 "dialect", 1179 "sql", 1180 "errors", 1181 "_tokens", 1182 "_index", 1183 "_curr", 1184 "_next", 1185 "_prev", 1186 "_prev_comments", 1187 ) 1188 1189 # Autofilled 1190 SHOW_TRIE: t.Dict = {} 1191 SET_TRIE: t.Dict = {} 1192 1193 def __init__( 1194 self, 1195 error_level: t.Optional[ErrorLevel] = None, 1196 error_message_context: int = 100, 1197 max_errors: int = 3, 1198 dialect: DialectType = None, 1199 ): 1200 from sqlglot.dialects import Dialect 1201 1202 self.error_level = error_level or ErrorLevel.IMMEDIATE 1203 self.error_message_context = error_message_context 1204 self.max_errors = max_errors 1205 self.dialect = Dialect.get_or_raise(dialect) 1206 self.reset() 1207 1208 def reset(self): 1209 self.sql = "" 1210 self.errors = [] 1211 self._tokens = [] 1212 self._index = 0 1213 self._curr = None 1214 self._next = None 1215 self._prev = None 1216 self._prev_comments = None 1217 1218 def parse( 1219 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1220 ) -> t.List[t.Optional[exp.Expression]]: 1221 """ 1222 Parses a list of tokens and returns a list of syntax trees, one tree 1223 per parsed SQL statement. 1224 1225 Args: 1226 raw_tokens: The list of tokens. 1227 sql: The original SQL string, used to produce helpful debug messages. 1228 1229 Returns: 1230 The list of the produced syntax trees. 1231 """ 1232 return self._parse( 1233 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1234 ) 1235 1236 def parse_into( 1237 self, 1238 expression_types: exp.IntoType, 1239 raw_tokens: t.List[Token], 1240 sql: t.Optional[str] = None, 1241 ) -> t.List[t.Optional[exp.Expression]]: 1242 """ 1243 Parses a list of tokens into a given Expression type. If a collection of Expression 1244 types is given instead, this method will try to parse the token list into each one 1245 of them, stopping at the first for which the parsing succeeds. 1246 1247 Args: 1248 expression_types: The expression type(s) to try and parse the token list into. 1249 raw_tokens: The list of tokens. 1250 sql: The original SQL string, used to produce helpful debug messages. 1251 1252 Returns: 1253 The target Expression. 1254 """ 1255 errors = [] 1256 for expression_type in ensure_list(expression_types): 1257 parser = self.EXPRESSION_PARSERS.get(expression_type) 1258 if not parser: 1259 raise TypeError(f"No parser registered for {expression_type}") 1260 1261 try: 1262 return self._parse(parser, raw_tokens, sql) 1263 except ParseError as e: 1264 e.errors[0]["into_expression"] = expression_type 1265 errors.append(e) 1266 1267 raise ParseError( 1268 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1269 errors=merge_errors(errors), 1270 ) from errors[-1] 1271 1272 def _parse( 1273 self, 1274 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1275 raw_tokens: t.List[Token], 1276 sql: t.Optional[str] = None, 1277 ) -> t.List[t.Optional[exp.Expression]]: 1278 self.reset() 1279 self.sql = sql or "" 1280 1281 total = len(raw_tokens) 1282 chunks: t.List[t.List[Token]] = [[]] 1283 1284 for i, token in enumerate(raw_tokens): 1285 if token.token_type == TokenType.SEMICOLON: 1286 if token.comments: 1287 chunks.append([token]) 1288 1289 if i < total - 1: 1290 chunks.append([]) 1291 else: 1292 chunks[-1].append(token) 1293 1294 expressions = [] 1295 1296 for tokens in chunks: 1297 self._index = -1 1298 self._tokens = tokens 1299 self._advance() 1300 1301 expressions.append(parse_method(self)) 1302 1303 if self._index < len(self._tokens): 1304 self.raise_error("Invalid expression / Unexpected token") 1305 1306 self.check_errors() 1307 1308 return expressions 1309 1310 def check_errors(self) -> None: 1311 """Logs or raises any found errors, depending on the chosen error level setting.""" 1312 if self.error_level == ErrorLevel.WARN: 1313 for error in self.errors: 1314 logger.error(str(error)) 1315 elif self.error_level == ErrorLevel.RAISE and self.errors: 1316 raise ParseError( 1317 concat_messages(self.errors, self.max_errors), 1318 errors=merge_errors(self.errors), 1319 ) 1320 1321 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1322 """ 1323 Appends an error in the list of recorded errors or raises it, depending on the chosen 1324 error level setting. 1325 """ 1326 token = token or self._curr or self._prev or Token.string("") 1327 start = token.start 1328 end = token.end + 1 1329 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1330 highlight = self.sql[start:end] 1331 end_context = self.sql[end : end + self.error_message_context] 1332 1333 error = ParseError.new( 1334 f"{message}. Line {token.line}, Col: {token.col}.\n" 1335 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1336 description=message, 1337 line=token.line, 1338 col=token.col, 1339 start_context=start_context, 1340 highlight=highlight, 1341 end_context=end_context, 1342 ) 1343 1344 if self.error_level == ErrorLevel.IMMEDIATE: 1345 raise error 1346 1347 self.errors.append(error) 1348 1349 def expression( 1350 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1351 ) -> E: 1352 """ 1353 Creates a new, validated Expression. 1354 1355 Args: 1356 exp_class: The expression class to instantiate. 1357 comments: An optional list of comments to attach to the expression. 1358 kwargs: The arguments to set for the expression along with their respective values. 1359 1360 Returns: 1361 The target expression. 1362 """ 1363 instance = exp_class(**kwargs) 1364 instance.add_comments(comments) if comments else self._add_comments(instance) 1365 return self.validate_expression(instance) 1366 1367 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1368 if expression and self._prev_comments: 1369 expression.add_comments(self._prev_comments) 1370 self._prev_comments = None 1371 1372 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1373 """ 1374 Validates an Expression, making sure that all its mandatory arguments are set. 1375 1376 Args: 1377 expression: The expression to validate. 1378 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1379 1380 Returns: 1381 The validated expression. 1382 """ 1383 if self.error_level != ErrorLevel.IGNORE: 1384 for error_message in expression.error_messages(args): 1385 self.raise_error(error_message) 1386 1387 return expression 1388 1389 def _find_sql(self, start: Token, end: Token) -> str: 1390 return self.sql[start.start : end.end + 1] 1391 1392 def _is_connected(self) -> bool: 1393 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1394 1395 def _advance(self, times: int = 1) -> None: 1396 self._index += times 1397 self._curr = seq_get(self._tokens, self._index) 1398 self._next = seq_get(self._tokens, self._index + 1) 1399 1400 if self._index > 0: 1401 self._prev = self._tokens[self._index - 1] 1402 self._prev_comments = self._prev.comments 1403 else: 1404 self._prev = None 1405 self._prev_comments = None 1406 1407 def _retreat(self, index: int) -> None: 1408 if index != self._index: 1409 self._advance(index - self._index) 1410 1411 def _warn_unsupported(self) -> None: 1412 if len(self._tokens) <= 1: 1413 return 1414 1415 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1416 # interested in emitting a warning for the one being currently processed. 1417 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1418 1419 logger.warning( 1420 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1421 ) 1422 1423 def _parse_command(self) -> exp.Command: 1424 self._warn_unsupported() 1425 return self.expression( 1426 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1427 ) 1428 1429 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1430 """ 1431 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1432 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1433 the parser state accordingly 1434 """ 1435 index = self._index 1436 error_level = self.error_level 1437 1438 self.error_level = ErrorLevel.IMMEDIATE 1439 try: 1440 this = parse_method() 1441 except ParseError: 1442 this = None 1443 finally: 1444 if not this or retreat: 1445 self._retreat(index) 1446 self.error_level = error_level 1447 1448 return this 1449 1450 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1451 start = self._prev 1452 exists = self._parse_exists() if allow_exists else None 1453 1454 self._match(TokenType.ON) 1455 1456 materialized = self._match_text_seq("MATERIALIZED") 1457 kind = self._match_set(self.CREATABLES) and self._prev 1458 if not kind: 1459 return self._parse_as_command(start) 1460 1461 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1462 this = self._parse_user_defined_function(kind=kind.token_type) 1463 elif kind.token_type == TokenType.TABLE: 1464 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1465 elif kind.token_type == TokenType.COLUMN: 1466 this = self._parse_column() 1467 else: 1468 this = self._parse_id_var() 1469 1470 self._match(TokenType.IS) 1471 1472 return self.expression( 1473 exp.Comment, 1474 this=this, 1475 kind=kind.text, 1476 expression=self._parse_string(), 1477 exists=exists, 1478 materialized=materialized, 1479 ) 1480 1481 def _parse_to_table( 1482 self, 1483 ) -> exp.ToTableProperty: 1484 table = self._parse_table_parts(schema=True) 1485 return self.expression(exp.ToTableProperty, this=table) 1486 1487 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1488 def _parse_ttl(self) -> exp.Expression: 1489 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1490 this = self._parse_bitwise() 1491 1492 if self._match_text_seq("DELETE"): 1493 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1494 if self._match_text_seq("RECOMPRESS"): 1495 return self.expression( 1496 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1497 ) 1498 if self._match_text_seq("TO", "DISK"): 1499 return self.expression( 1500 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1501 ) 1502 if self._match_text_seq("TO", "VOLUME"): 1503 return self.expression( 1504 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1505 ) 1506 1507 return this 1508 1509 expressions = self._parse_csv(_parse_ttl_action) 1510 where = self._parse_where() 1511 group = self._parse_group() 1512 1513 aggregates = None 1514 if group and self._match(TokenType.SET): 1515 aggregates = self._parse_csv(self._parse_set_item) 1516 1517 return self.expression( 1518 exp.MergeTreeTTL, 1519 expressions=expressions, 1520 where=where, 1521 group=group, 1522 aggregates=aggregates, 1523 ) 1524 1525 def _parse_statement(self) -> t.Optional[exp.Expression]: 1526 if self._curr is None: 1527 return None 1528 1529 if self._match_set(self.STATEMENT_PARSERS): 1530 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1531 1532 if self._match_set(self.dialect.tokenizer.COMMANDS): 1533 return self._parse_command() 1534 1535 expression = self._parse_expression() 1536 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1537 return self._parse_query_modifiers(expression) 1538 1539 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1540 start = self._prev 1541 temporary = self._match(TokenType.TEMPORARY) 1542 materialized = self._match_text_seq("MATERIALIZED") 1543 1544 kind = self._match_set(self.CREATABLES) and self._prev.text 1545 if not kind: 1546 return self._parse_as_command(start) 1547 1548 if_exists = exists or self._parse_exists() 1549 table = self._parse_table_parts( 1550 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1551 ) 1552 1553 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1554 1555 if self._match(TokenType.L_PAREN, advance=False): 1556 expressions = self._parse_wrapped_csv(self._parse_types) 1557 else: 1558 expressions = None 1559 1560 return self.expression( 1561 exp.Drop, 1562 comments=start.comments, 1563 exists=if_exists, 1564 this=table, 1565 expressions=expressions, 1566 kind=kind.upper(), 1567 temporary=temporary, 1568 materialized=materialized, 1569 cascade=self._match_text_seq("CASCADE"), 1570 constraints=self._match_text_seq("CONSTRAINTS"), 1571 purge=self._match_text_seq("PURGE"), 1572 cluster=cluster, 1573 ) 1574 1575 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1576 return ( 1577 self._match_text_seq("IF") 1578 and (not not_ or self._match(TokenType.NOT)) 1579 and self._match(TokenType.EXISTS) 1580 ) 1581 1582 def _parse_create(self) -> exp.Create | exp.Command: 1583 # Note: this can't be None because we've matched a statement parser 1584 start = self._prev 1585 comments = self._prev_comments 1586 1587 replace = ( 1588 start.token_type == TokenType.REPLACE 1589 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1590 or self._match_pair(TokenType.OR, TokenType.ALTER) 1591 ) 1592 1593 unique = self._match(TokenType.UNIQUE) 1594 1595 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1596 self._advance() 1597 1598 properties = None 1599 create_token = self._match_set(self.CREATABLES) and self._prev 1600 1601 if not create_token: 1602 # exp.Properties.Location.POST_CREATE 1603 properties = self._parse_properties() 1604 create_token = self._match_set(self.CREATABLES) and self._prev 1605 1606 if not properties or not create_token: 1607 return self._parse_as_command(start) 1608 1609 exists = self._parse_exists(not_=True) 1610 this = None 1611 expression: t.Optional[exp.Expression] = None 1612 indexes = None 1613 no_schema_binding = None 1614 begin = None 1615 end = None 1616 clone = None 1617 1618 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1619 nonlocal properties 1620 if properties and temp_props: 1621 properties.expressions.extend(temp_props.expressions) 1622 elif temp_props: 1623 properties = temp_props 1624 1625 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1626 this = self._parse_user_defined_function(kind=create_token.token_type) 1627 1628 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1629 extend_props(self._parse_properties()) 1630 1631 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1632 1633 if not expression: 1634 if self._match(TokenType.COMMAND): 1635 expression = self._parse_as_command(self._prev) 1636 else: 1637 begin = self._match(TokenType.BEGIN) 1638 return_ = self._match_text_seq("RETURN") 1639 1640 if self._match(TokenType.STRING, advance=False): 1641 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1642 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1643 expression = self._parse_string() 1644 extend_props(self._parse_properties()) 1645 else: 1646 expression = self._parse_statement() 1647 1648 end = self._match_text_seq("END") 1649 1650 if return_: 1651 expression = self.expression(exp.Return, this=expression) 1652 elif create_token.token_type == TokenType.INDEX: 1653 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1654 if not self._match(TokenType.ON): 1655 index = self._parse_id_var() 1656 anonymous = False 1657 else: 1658 index = None 1659 anonymous = True 1660 1661 this = self._parse_index(index=index, anonymous=anonymous) 1662 elif create_token.token_type in self.DB_CREATABLES: 1663 table_parts = self._parse_table_parts( 1664 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1665 ) 1666 1667 # exp.Properties.Location.POST_NAME 1668 self._match(TokenType.COMMA) 1669 extend_props(self._parse_properties(before=True)) 1670 1671 this = self._parse_schema(this=table_parts) 1672 1673 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1674 extend_props(self._parse_properties()) 1675 1676 self._match(TokenType.ALIAS) 1677 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1678 # exp.Properties.Location.POST_ALIAS 1679 extend_props(self._parse_properties()) 1680 1681 if create_token.token_type == TokenType.SEQUENCE: 1682 expression = self._parse_types() 1683 extend_props(self._parse_properties()) 1684 else: 1685 expression = self._parse_ddl_select() 1686 1687 if create_token.token_type == TokenType.TABLE: 1688 # exp.Properties.Location.POST_EXPRESSION 1689 extend_props(self._parse_properties()) 1690 1691 indexes = [] 1692 while True: 1693 index = self._parse_index() 1694 1695 # exp.Properties.Location.POST_INDEX 1696 extend_props(self._parse_properties()) 1697 1698 if not index: 1699 break 1700 else: 1701 self._match(TokenType.COMMA) 1702 indexes.append(index) 1703 elif create_token.token_type == TokenType.VIEW: 1704 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1705 no_schema_binding = True 1706 1707 shallow = self._match_text_seq("SHALLOW") 1708 1709 if self._match_texts(self.CLONE_KEYWORDS): 1710 copy = self._prev.text.lower() == "copy" 1711 clone = self.expression( 1712 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1713 ) 1714 1715 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1716 return self._parse_as_command(start) 1717 1718 return self.expression( 1719 exp.Create, 1720 comments=comments, 1721 this=this, 1722 kind=create_token.text.upper(), 1723 replace=replace, 1724 unique=unique, 1725 expression=expression, 1726 exists=exists, 1727 properties=properties, 1728 indexes=indexes, 1729 no_schema_binding=no_schema_binding, 1730 begin=begin, 1731 end=end, 1732 clone=clone, 1733 ) 1734 1735 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1736 seq = exp.SequenceProperties() 1737 1738 options = [] 1739 index = self._index 1740 1741 while self._curr: 1742 self._match(TokenType.COMMA) 1743 if self._match_text_seq("INCREMENT"): 1744 self._match_text_seq("BY") 1745 self._match_text_seq("=") 1746 seq.set("increment", self._parse_term()) 1747 elif self._match_text_seq("MINVALUE"): 1748 seq.set("minvalue", self._parse_term()) 1749 elif self._match_text_seq("MAXVALUE"): 1750 seq.set("maxvalue", self._parse_term()) 1751 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1752 self._match_text_seq("=") 1753 seq.set("start", self._parse_term()) 1754 elif self._match_text_seq("CACHE"): 1755 # T-SQL allows empty CACHE which is initialized dynamically 1756 seq.set("cache", self._parse_number() or True) 1757 elif self._match_text_seq("OWNED", "BY"): 1758 # "OWNED BY NONE" is the default 1759 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1760 else: 1761 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1762 if opt: 1763 options.append(opt) 1764 else: 1765 break 1766 1767 seq.set("options", options if options else None) 1768 return None if self._index == index else seq 1769 1770 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1771 # only used for teradata currently 1772 self._match(TokenType.COMMA) 1773 1774 kwargs = { 1775 "no": self._match_text_seq("NO"), 1776 "dual": self._match_text_seq("DUAL"), 1777 "before": self._match_text_seq("BEFORE"), 1778 "default": self._match_text_seq("DEFAULT"), 1779 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1780 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1781 "after": self._match_text_seq("AFTER"), 1782 "minimum": self._match_texts(("MIN", "MINIMUM")), 1783 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1784 } 1785 1786 if self._match_texts(self.PROPERTY_PARSERS): 1787 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1788 try: 1789 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1790 except TypeError: 1791 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1792 1793 return None 1794 1795 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1796 return self._parse_wrapped_csv(self._parse_property) 1797 1798 def _parse_property(self) -> t.Optional[exp.Expression]: 1799 if self._match_texts(self.PROPERTY_PARSERS): 1800 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1801 1802 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1803 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1804 1805 if self._match_text_seq("COMPOUND", "SORTKEY"): 1806 return self._parse_sortkey(compound=True) 1807 1808 if self._match_text_seq("SQL", "SECURITY"): 1809 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1810 1811 index = self._index 1812 key = self._parse_column() 1813 1814 if not self._match(TokenType.EQ): 1815 self._retreat(index) 1816 return self._parse_sequence_properties() 1817 1818 return self.expression( 1819 exp.Property, 1820 this=key.to_dot() if isinstance(key, exp.Column) else key, 1821 value=self._parse_bitwise() or self._parse_var(any_token=True), 1822 ) 1823 1824 def _parse_stored(self) -> exp.FileFormatProperty: 1825 self._match(TokenType.ALIAS) 1826 1827 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1828 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1829 1830 return self.expression( 1831 exp.FileFormatProperty, 1832 this=( 1833 self.expression( 1834 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1835 ) 1836 if input_format or output_format 1837 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1838 ), 1839 ) 1840 1841 def _parse_unquoted_field(self): 1842 field = self._parse_field() 1843 if isinstance(field, exp.Identifier) and not field.quoted: 1844 field = exp.var(field) 1845 1846 return field 1847 1848 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1849 self._match(TokenType.EQ) 1850 self._match(TokenType.ALIAS) 1851 1852 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1853 1854 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1855 properties = [] 1856 while True: 1857 if before: 1858 prop = self._parse_property_before() 1859 else: 1860 prop = self._parse_property() 1861 if not prop: 1862 break 1863 for p in ensure_list(prop): 1864 properties.append(p) 1865 1866 if properties: 1867 return self.expression(exp.Properties, expressions=properties) 1868 1869 return None 1870 1871 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1872 return self.expression( 1873 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1874 ) 1875 1876 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1877 if self._index >= 2: 1878 pre_volatile_token = self._tokens[self._index - 2] 1879 else: 1880 pre_volatile_token = None 1881 1882 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1883 return exp.VolatileProperty() 1884 1885 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1886 1887 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1888 self._match_pair(TokenType.EQ, TokenType.ON) 1889 1890 prop = self.expression(exp.WithSystemVersioningProperty) 1891 if self._match(TokenType.L_PAREN): 1892 self._match_text_seq("HISTORY_TABLE", "=") 1893 prop.set("this", self._parse_table_parts()) 1894 1895 if self._match(TokenType.COMMA): 1896 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1897 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1898 1899 self._match_r_paren() 1900 1901 return prop 1902 1903 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1904 if self._match(TokenType.L_PAREN, advance=False): 1905 return self._parse_wrapped_properties() 1906 1907 if self._match_text_seq("JOURNAL"): 1908 return self._parse_withjournaltable() 1909 1910 if self._match_texts(self.VIEW_ATTRIBUTES): 1911 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1912 1913 if self._match_text_seq("DATA"): 1914 return self._parse_withdata(no=False) 1915 elif self._match_text_seq("NO", "DATA"): 1916 return self._parse_withdata(no=True) 1917 1918 if not self._next: 1919 return None 1920 1921 return self._parse_withisolatedloading() 1922 1923 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1924 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1925 self._match(TokenType.EQ) 1926 1927 user = self._parse_id_var() 1928 self._match(TokenType.PARAMETER) 1929 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1930 1931 if not user or not host: 1932 return None 1933 1934 return exp.DefinerProperty(this=f"{user}@{host}") 1935 1936 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1937 self._match(TokenType.TABLE) 1938 self._match(TokenType.EQ) 1939 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1940 1941 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1942 return self.expression(exp.LogProperty, no=no) 1943 1944 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1945 return self.expression(exp.JournalProperty, **kwargs) 1946 1947 def _parse_checksum(self) -> exp.ChecksumProperty: 1948 self._match(TokenType.EQ) 1949 1950 on = None 1951 if self._match(TokenType.ON): 1952 on = True 1953 elif self._match_text_seq("OFF"): 1954 on = False 1955 1956 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1957 1958 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1959 return self.expression( 1960 exp.Cluster, 1961 expressions=( 1962 self._parse_wrapped_csv(self._parse_ordered) 1963 if wrapped 1964 else self._parse_csv(self._parse_ordered) 1965 ), 1966 ) 1967 1968 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1969 self._match_text_seq("BY") 1970 1971 self._match_l_paren() 1972 expressions = self._parse_csv(self._parse_column) 1973 self._match_r_paren() 1974 1975 if self._match_text_seq("SORTED", "BY"): 1976 self._match_l_paren() 1977 sorted_by = self._parse_csv(self._parse_ordered) 1978 self._match_r_paren() 1979 else: 1980 sorted_by = None 1981 1982 self._match(TokenType.INTO) 1983 buckets = self._parse_number() 1984 self._match_text_seq("BUCKETS") 1985 1986 return self.expression( 1987 exp.ClusteredByProperty, 1988 expressions=expressions, 1989 sorted_by=sorted_by, 1990 buckets=buckets, 1991 ) 1992 1993 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1994 if not self._match_text_seq("GRANTS"): 1995 self._retreat(self._index - 1) 1996 return None 1997 1998 return self.expression(exp.CopyGrantsProperty) 1999 2000 def _parse_freespace(self) -> exp.FreespaceProperty: 2001 self._match(TokenType.EQ) 2002 return self.expression( 2003 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2004 ) 2005 2006 def _parse_mergeblockratio( 2007 self, no: bool = False, default: bool = False 2008 ) -> exp.MergeBlockRatioProperty: 2009 if self._match(TokenType.EQ): 2010 return self.expression( 2011 exp.MergeBlockRatioProperty, 2012 this=self._parse_number(), 2013 percent=self._match(TokenType.PERCENT), 2014 ) 2015 2016 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2017 2018 def _parse_datablocksize( 2019 self, 2020 default: t.Optional[bool] = None, 2021 minimum: t.Optional[bool] = None, 2022 maximum: t.Optional[bool] = None, 2023 ) -> exp.DataBlocksizeProperty: 2024 self._match(TokenType.EQ) 2025 size = self._parse_number() 2026 2027 units = None 2028 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2029 units = self._prev.text 2030 2031 return self.expression( 2032 exp.DataBlocksizeProperty, 2033 size=size, 2034 units=units, 2035 default=default, 2036 minimum=minimum, 2037 maximum=maximum, 2038 ) 2039 2040 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2041 self._match(TokenType.EQ) 2042 always = self._match_text_seq("ALWAYS") 2043 manual = self._match_text_seq("MANUAL") 2044 never = self._match_text_seq("NEVER") 2045 default = self._match_text_seq("DEFAULT") 2046 2047 autotemp = None 2048 if self._match_text_seq("AUTOTEMP"): 2049 autotemp = self._parse_schema() 2050 2051 return self.expression( 2052 exp.BlockCompressionProperty, 2053 always=always, 2054 manual=manual, 2055 never=never, 2056 default=default, 2057 autotemp=autotemp, 2058 ) 2059 2060 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2061 index = self._index 2062 no = self._match_text_seq("NO") 2063 concurrent = self._match_text_seq("CONCURRENT") 2064 2065 if not self._match_text_seq("ISOLATED", "LOADING"): 2066 self._retreat(index) 2067 return None 2068 2069 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2070 return self.expression( 2071 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2072 ) 2073 2074 def _parse_locking(self) -> exp.LockingProperty: 2075 if self._match(TokenType.TABLE): 2076 kind = "TABLE" 2077 elif self._match(TokenType.VIEW): 2078 kind = "VIEW" 2079 elif self._match(TokenType.ROW): 2080 kind = "ROW" 2081 elif self._match_text_seq("DATABASE"): 2082 kind = "DATABASE" 2083 else: 2084 kind = None 2085 2086 if kind in ("DATABASE", "TABLE", "VIEW"): 2087 this = self._parse_table_parts() 2088 else: 2089 this = None 2090 2091 if self._match(TokenType.FOR): 2092 for_or_in = "FOR" 2093 elif self._match(TokenType.IN): 2094 for_or_in = "IN" 2095 else: 2096 for_or_in = None 2097 2098 if self._match_text_seq("ACCESS"): 2099 lock_type = "ACCESS" 2100 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2101 lock_type = "EXCLUSIVE" 2102 elif self._match_text_seq("SHARE"): 2103 lock_type = "SHARE" 2104 elif self._match_text_seq("READ"): 2105 lock_type = "READ" 2106 elif self._match_text_seq("WRITE"): 2107 lock_type = "WRITE" 2108 elif self._match_text_seq("CHECKSUM"): 2109 lock_type = "CHECKSUM" 2110 else: 2111 lock_type = None 2112 2113 override = self._match_text_seq("OVERRIDE") 2114 2115 return self.expression( 2116 exp.LockingProperty, 2117 this=this, 2118 kind=kind, 2119 for_or_in=for_or_in, 2120 lock_type=lock_type, 2121 override=override, 2122 ) 2123 2124 def _parse_partition_by(self) -> t.List[exp.Expression]: 2125 if self._match(TokenType.PARTITION_BY): 2126 return self._parse_csv(self._parse_conjunction) 2127 return [] 2128 2129 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2130 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2131 if self._match_text_seq("MINVALUE"): 2132 return exp.var("MINVALUE") 2133 if self._match_text_seq("MAXVALUE"): 2134 return exp.var("MAXVALUE") 2135 return self._parse_bitwise() 2136 2137 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2138 expression = None 2139 from_expressions = None 2140 to_expressions = None 2141 2142 if self._match(TokenType.IN): 2143 this = self._parse_wrapped_csv(self._parse_bitwise) 2144 elif self._match(TokenType.FROM): 2145 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2146 self._match_text_seq("TO") 2147 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2148 elif self._match_text_seq("WITH", "(", "MODULUS"): 2149 this = self._parse_number() 2150 self._match_text_seq(",", "REMAINDER") 2151 expression = self._parse_number() 2152 self._match_r_paren() 2153 else: 2154 self.raise_error("Failed to parse partition bound spec.") 2155 2156 return self.expression( 2157 exp.PartitionBoundSpec, 2158 this=this, 2159 expression=expression, 2160 from_expressions=from_expressions, 2161 to_expressions=to_expressions, 2162 ) 2163 2164 # https://www.postgresql.org/docs/current/sql-createtable.html 2165 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2166 if not self._match_text_seq("OF"): 2167 self._retreat(self._index - 1) 2168 return None 2169 2170 this = self._parse_table(schema=True) 2171 2172 if self._match(TokenType.DEFAULT): 2173 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2174 elif self._match_text_seq("FOR", "VALUES"): 2175 expression = self._parse_partition_bound_spec() 2176 else: 2177 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2178 2179 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2180 2181 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2182 self._match(TokenType.EQ) 2183 return self.expression( 2184 exp.PartitionedByProperty, 2185 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2186 ) 2187 2188 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2189 if self._match_text_seq("AND", "STATISTICS"): 2190 statistics = True 2191 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2192 statistics = False 2193 else: 2194 statistics = None 2195 2196 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2197 2198 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2199 if self._match_text_seq("SQL"): 2200 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2201 return None 2202 2203 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2204 if self._match_text_seq("SQL", "DATA"): 2205 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2206 return None 2207 2208 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2209 if self._match_text_seq("PRIMARY", "INDEX"): 2210 return exp.NoPrimaryIndexProperty() 2211 if self._match_text_seq("SQL"): 2212 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2213 return None 2214 2215 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2216 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2217 return exp.OnCommitProperty() 2218 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2219 return exp.OnCommitProperty(delete=True) 2220 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2221 2222 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2223 if self._match_text_seq("SQL", "DATA"): 2224 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2225 return None 2226 2227 def _parse_distkey(self) -> exp.DistKeyProperty: 2228 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2229 2230 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2231 table = self._parse_table(schema=True) 2232 2233 options = [] 2234 while self._match_texts(("INCLUDING", "EXCLUDING")): 2235 this = self._prev.text.upper() 2236 2237 id_var = self._parse_id_var() 2238 if not id_var: 2239 return None 2240 2241 options.append( 2242 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2243 ) 2244 2245 return self.expression(exp.LikeProperty, this=table, expressions=options) 2246 2247 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2248 return self.expression( 2249 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2250 ) 2251 2252 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2253 self._match(TokenType.EQ) 2254 return self.expression( 2255 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2256 ) 2257 2258 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2259 self._match_text_seq("WITH", "CONNECTION") 2260 return self.expression( 2261 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2262 ) 2263 2264 def _parse_returns(self) -> exp.ReturnsProperty: 2265 value: t.Optional[exp.Expression] 2266 is_table = self._match(TokenType.TABLE) 2267 2268 if is_table: 2269 if self._match(TokenType.LT): 2270 value = self.expression( 2271 exp.Schema, 2272 this="TABLE", 2273 expressions=self._parse_csv(self._parse_struct_types), 2274 ) 2275 if not self._match(TokenType.GT): 2276 self.raise_error("Expecting >") 2277 else: 2278 value = self._parse_schema(exp.var("TABLE")) 2279 else: 2280 value = self._parse_types() 2281 2282 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2283 2284 def _parse_describe(self) -> exp.Describe: 2285 kind = self._match_set(self.CREATABLES) and self._prev.text 2286 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2287 if self._match(TokenType.DOT): 2288 style = None 2289 self._retreat(self._index - 2) 2290 this = self._parse_table(schema=True) 2291 properties = self._parse_properties() 2292 expressions = properties.expressions if properties else None 2293 return self.expression( 2294 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2295 ) 2296 2297 def _parse_insert(self) -> exp.Insert: 2298 comments = ensure_list(self._prev_comments) 2299 hint = self._parse_hint() 2300 overwrite = self._match(TokenType.OVERWRITE) 2301 ignore = self._match(TokenType.IGNORE) 2302 local = self._match_text_seq("LOCAL") 2303 alternative = None 2304 is_function = None 2305 2306 if self._match_text_seq("DIRECTORY"): 2307 this: t.Optional[exp.Expression] = self.expression( 2308 exp.Directory, 2309 this=self._parse_var_or_string(), 2310 local=local, 2311 row_format=self._parse_row_format(match_row=True), 2312 ) 2313 else: 2314 if self._match(TokenType.OR): 2315 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2316 2317 self._match(TokenType.INTO) 2318 comments += ensure_list(self._prev_comments) 2319 self._match(TokenType.TABLE) 2320 is_function = self._match(TokenType.FUNCTION) 2321 2322 this = ( 2323 self._parse_table(schema=True, parse_partition=True) 2324 if not is_function 2325 else self._parse_function() 2326 ) 2327 2328 returning = self._parse_returning() 2329 2330 return self.expression( 2331 exp.Insert, 2332 comments=comments, 2333 hint=hint, 2334 is_function=is_function, 2335 this=this, 2336 stored=self._match_text_seq("STORED") and self._parse_stored(), 2337 by_name=self._match_text_seq("BY", "NAME"), 2338 exists=self._parse_exists(), 2339 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2340 and self._parse_conjunction(), 2341 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2342 conflict=self._parse_on_conflict(), 2343 returning=returning or self._parse_returning(), 2344 overwrite=overwrite, 2345 alternative=alternative, 2346 ignore=ignore, 2347 ) 2348 2349 def _parse_kill(self) -> exp.Kill: 2350 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2351 2352 return self.expression( 2353 exp.Kill, 2354 this=self._parse_primary(), 2355 kind=kind, 2356 ) 2357 2358 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2359 conflict = self._match_text_seq("ON", "CONFLICT") 2360 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2361 2362 if not conflict and not duplicate: 2363 return None 2364 2365 conflict_keys = None 2366 constraint = None 2367 2368 if conflict: 2369 if self._match_text_seq("ON", "CONSTRAINT"): 2370 constraint = self._parse_id_var() 2371 elif self._match(TokenType.L_PAREN): 2372 conflict_keys = self._parse_csv(self._parse_id_var) 2373 self._match_r_paren() 2374 2375 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2376 if self._prev.token_type == TokenType.UPDATE: 2377 self._match(TokenType.SET) 2378 expressions = self._parse_csv(self._parse_equality) 2379 else: 2380 expressions = None 2381 2382 return self.expression( 2383 exp.OnConflict, 2384 duplicate=duplicate, 2385 expressions=expressions, 2386 action=action, 2387 conflict_keys=conflict_keys, 2388 constraint=constraint, 2389 ) 2390 2391 def _parse_returning(self) -> t.Optional[exp.Returning]: 2392 if not self._match(TokenType.RETURNING): 2393 return None 2394 return self.expression( 2395 exp.Returning, 2396 expressions=self._parse_csv(self._parse_expression), 2397 into=self._match(TokenType.INTO) and self._parse_table_part(), 2398 ) 2399 2400 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2401 if not self._match(TokenType.FORMAT): 2402 return None 2403 return self._parse_row_format() 2404 2405 def _parse_row_format( 2406 self, match_row: bool = False 2407 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2408 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2409 return None 2410 2411 if self._match_text_seq("SERDE"): 2412 this = self._parse_string() 2413 2414 serde_properties = None 2415 if self._match(TokenType.SERDE_PROPERTIES): 2416 serde_properties = self.expression( 2417 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2418 ) 2419 2420 return self.expression( 2421 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2422 ) 2423 2424 self._match_text_seq("DELIMITED") 2425 2426 kwargs = {} 2427 2428 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2429 kwargs["fields"] = self._parse_string() 2430 if self._match_text_seq("ESCAPED", "BY"): 2431 kwargs["escaped"] = self._parse_string() 2432 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2433 kwargs["collection_items"] = self._parse_string() 2434 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2435 kwargs["map_keys"] = self._parse_string() 2436 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2437 kwargs["lines"] = self._parse_string() 2438 if self._match_text_seq("NULL", "DEFINED", "AS"): 2439 kwargs["null"] = self._parse_string() 2440 2441 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2442 2443 def _parse_load(self) -> exp.LoadData | exp.Command: 2444 if self._match_text_seq("DATA"): 2445 local = self._match_text_seq("LOCAL") 2446 self._match_text_seq("INPATH") 2447 inpath = self._parse_string() 2448 overwrite = self._match(TokenType.OVERWRITE) 2449 self._match_pair(TokenType.INTO, TokenType.TABLE) 2450 2451 return self.expression( 2452 exp.LoadData, 2453 this=self._parse_table(schema=True), 2454 local=local, 2455 overwrite=overwrite, 2456 inpath=inpath, 2457 partition=self._parse_partition(), 2458 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2459 serde=self._match_text_seq("SERDE") and self._parse_string(), 2460 ) 2461 return self._parse_as_command(self._prev) 2462 2463 def _parse_delete(self) -> exp.Delete: 2464 # This handles MySQL's "Multiple-Table Syntax" 2465 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2466 tables = None 2467 comments = self._prev_comments 2468 if not self._match(TokenType.FROM, advance=False): 2469 tables = self._parse_csv(self._parse_table) or None 2470 2471 returning = self._parse_returning() 2472 2473 return self.expression( 2474 exp.Delete, 2475 comments=comments, 2476 tables=tables, 2477 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2478 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2479 where=self._parse_where(), 2480 returning=returning or self._parse_returning(), 2481 limit=self._parse_limit(), 2482 ) 2483 2484 def _parse_update(self) -> exp.Update: 2485 comments = self._prev_comments 2486 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2487 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2488 returning = self._parse_returning() 2489 return self.expression( 2490 exp.Update, 2491 comments=comments, 2492 **{ # type: ignore 2493 "this": this, 2494 "expressions": expressions, 2495 "from": self._parse_from(joins=True), 2496 "where": self._parse_where(), 2497 "returning": returning or self._parse_returning(), 2498 "order": self._parse_order(), 2499 "limit": self._parse_limit(), 2500 }, 2501 ) 2502 2503 def _parse_uncache(self) -> exp.Uncache: 2504 if not self._match(TokenType.TABLE): 2505 self.raise_error("Expecting TABLE after UNCACHE") 2506 2507 return self.expression( 2508 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2509 ) 2510 2511 def _parse_cache(self) -> exp.Cache: 2512 lazy = self._match_text_seq("LAZY") 2513 self._match(TokenType.TABLE) 2514 table = self._parse_table(schema=True) 2515 2516 options = [] 2517 if self._match_text_seq("OPTIONS"): 2518 self._match_l_paren() 2519 k = self._parse_string() 2520 self._match(TokenType.EQ) 2521 v = self._parse_string() 2522 options = [k, v] 2523 self._match_r_paren() 2524 2525 self._match(TokenType.ALIAS) 2526 return self.expression( 2527 exp.Cache, 2528 this=table, 2529 lazy=lazy, 2530 options=options, 2531 expression=self._parse_select(nested=True), 2532 ) 2533 2534 def _parse_partition(self) -> t.Optional[exp.Partition]: 2535 if not self._match(TokenType.PARTITION): 2536 return None 2537 2538 return self.expression( 2539 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2540 ) 2541 2542 def _parse_value(self) -> t.Optional[exp.Tuple]: 2543 if self._match(TokenType.L_PAREN): 2544 expressions = self._parse_csv(self._parse_expression) 2545 self._match_r_paren() 2546 return self.expression(exp.Tuple, expressions=expressions) 2547 2548 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2549 expression = self._parse_expression() 2550 if expression: 2551 return self.expression(exp.Tuple, expressions=[expression]) 2552 return None 2553 2554 def _parse_projections(self) -> t.List[exp.Expression]: 2555 return self._parse_expressions() 2556 2557 def _parse_select( 2558 self, 2559 nested: bool = False, 2560 table: bool = False, 2561 parse_subquery_alias: bool = True, 2562 parse_set_operation: bool = True, 2563 ) -> t.Optional[exp.Expression]: 2564 cte = self._parse_with() 2565 2566 if cte: 2567 this = self._parse_statement() 2568 2569 if not this: 2570 self.raise_error("Failed to parse any statement following CTE") 2571 return cte 2572 2573 if "with" in this.arg_types: 2574 this.set("with", cte) 2575 else: 2576 self.raise_error(f"{this.key} does not support CTE") 2577 this = cte 2578 2579 return this 2580 2581 # duckdb supports leading with FROM x 2582 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2583 2584 if self._match(TokenType.SELECT): 2585 comments = self._prev_comments 2586 2587 hint = self._parse_hint() 2588 all_ = self._match(TokenType.ALL) 2589 distinct = self._match_set(self.DISTINCT_TOKENS) 2590 2591 kind = ( 2592 self._match(TokenType.ALIAS) 2593 and self._match_texts(("STRUCT", "VALUE")) 2594 and self._prev.text.upper() 2595 ) 2596 2597 if distinct: 2598 distinct = self.expression( 2599 exp.Distinct, 2600 on=self._parse_value() if self._match(TokenType.ON) else None, 2601 ) 2602 2603 if all_ and distinct: 2604 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2605 2606 limit = self._parse_limit(top=True) 2607 projections = self._parse_projections() 2608 2609 this = self.expression( 2610 exp.Select, 2611 kind=kind, 2612 hint=hint, 2613 distinct=distinct, 2614 expressions=projections, 2615 limit=limit, 2616 ) 2617 this.comments = comments 2618 2619 into = self._parse_into() 2620 if into: 2621 this.set("into", into) 2622 2623 if not from_: 2624 from_ = self._parse_from() 2625 2626 if from_: 2627 this.set("from", from_) 2628 2629 this = self._parse_query_modifiers(this) 2630 elif (table or nested) and self._match(TokenType.L_PAREN): 2631 if self._match(TokenType.PIVOT): 2632 this = self._parse_simplified_pivot() 2633 elif self._match(TokenType.FROM): 2634 this = exp.select("*").from_( 2635 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2636 ) 2637 else: 2638 this = ( 2639 self._parse_table() 2640 if table 2641 else self._parse_select(nested=True, parse_set_operation=False) 2642 ) 2643 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2644 2645 self._match_r_paren() 2646 2647 # We return early here so that the UNION isn't attached to the subquery by the 2648 # following call to _parse_set_operations, but instead becomes the parent node 2649 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2650 elif self._match(TokenType.VALUES, advance=False): 2651 this = self._parse_derived_table_values() 2652 elif from_: 2653 this = exp.select("*").from_(from_.this, copy=False) 2654 else: 2655 this = None 2656 2657 if parse_set_operation: 2658 return self._parse_set_operations(this) 2659 return this 2660 2661 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2662 if not skip_with_token and not self._match(TokenType.WITH): 2663 return None 2664 2665 comments = self._prev_comments 2666 recursive = self._match(TokenType.RECURSIVE) 2667 2668 expressions = [] 2669 while True: 2670 expressions.append(self._parse_cte()) 2671 2672 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2673 break 2674 else: 2675 self._match(TokenType.WITH) 2676 2677 return self.expression( 2678 exp.With, comments=comments, expressions=expressions, recursive=recursive 2679 ) 2680 2681 def _parse_cte(self) -> exp.CTE: 2682 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2683 if not alias or not alias.this: 2684 self.raise_error("Expected CTE to have alias") 2685 2686 self._match(TokenType.ALIAS) 2687 2688 if self._match_text_seq("NOT", "MATERIALIZED"): 2689 materialized = False 2690 elif self._match_text_seq("MATERIALIZED"): 2691 materialized = True 2692 else: 2693 materialized = None 2694 2695 return self.expression( 2696 exp.CTE, 2697 this=self._parse_wrapped(self._parse_statement), 2698 alias=alias, 2699 materialized=materialized, 2700 ) 2701 2702 def _parse_table_alias( 2703 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2704 ) -> t.Optional[exp.TableAlias]: 2705 any_token = self._match(TokenType.ALIAS) 2706 alias = ( 2707 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2708 or self._parse_string_as_identifier() 2709 ) 2710 2711 index = self._index 2712 if self._match(TokenType.L_PAREN): 2713 columns = self._parse_csv(self._parse_function_parameter) 2714 self._match_r_paren() if columns else self._retreat(index) 2715 else: 2716 columns = None 2717 2718 if not alias and not columns: 2719 return None 2720 2721 return self.expression(exp.TableAlias, this=alias, columns=columns) 2722 2723 def _parse_subquery( 2724 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2725 ) -> t.Optional[exp.Subquery]: 2726 if not this: 2727 return None 2728 2729 return self.expression( 2730 exp.Subquery, 2731 this=this, 2732 pivots=self._parse_pivots(), 2733 alias=self._parse_table_alias() if parse_alias else None, 2734 ) 2735 2736 def _implicit_unnests_to_explicit(self, this: E) -> E: 2737 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2738 2739 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2740 for i, join in enumerate(this.args.get("joins") or []): 2741 table = join.this 2742 normalized_table = table.copy() 2743 normalized_table.meta["maybe_column"] = True 2744 normalized_table = _norm(normalized_table, dialect=self.dialect) 2745 2746 if isinstance(table, exp.Table) and not join.args.get("on"): 2747 if normalized_table.parts[0].name in refs: 2748 table_as_column = table.to_column() 2749 unnest = exp.Unnest(expressions=[table_as_column]) 2750 2751 # Table.to_column creates a parent Alias node that we want to convert to 2752 # a TableAlias and attach to the Unnest, so it matches the parser's output 2753 if isinstance(table.args.get("alias"), exp.TableAlias): 2754 table_as_column.replace(table_as_column.this) 2755 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2756 2757 table.replace(unnest) 2758 2759 refs.add(normalized_table.alias_or_name) 2760 2761 return this 2762 2763 def _parse_query_modifiers( 2764 self, this: t.Optional[exp.Expression] 2765 ) -> t.Optional[exp.Expression]: 2766 if isinstance(this, (exp.Query, exp.Table)): 2767 for join in self._parse_joins(): 2768 this.append("joins", join) 2769 for lateral in iter(self._parse_lateral, None): 2770 this.append("laterals", lateral) 2771 2772 while True: 2773 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2774 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2775 key, expression = parser(self) 2776 2777 if expression: 2778 this.set(key, expression) 2779 if key == "limit": 2780 offset = expression.args.pop("offset", None) 2781 2782 if offset: 2783 offset = exp.Offset(expression=offset) 2784 this.set("offset", offset) 2785 2786 limit_by_expressions = expression.expressions 2787 expression.set("expressions", None) 2788 offset.set("expressions", limit_by_expressions) 2789 continue 2790 break 2791 2792 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2793 this = self._implicit_unnests_to_explicit(this) 2794 2795 return this 2796 2797 def _parse_hint(self) -> t.Optional[exp.Hint]: 2798 if self._match(TokenType.HINT): 2799 hints = [] 2800 for hint in iter( 2801 lambda: self._parse_csv( 2802 lambda: self._parse_function() or self._parse_var(upper=True) 2803 ), 2804 [], 2805 ): 2806 hints.extend(hint) 2807 2808 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2809 self.raise_error("Expected */ after HINT") 2810 2811 return self.expression(exp.Hint, expressions=hints) 2812 2813 return None 2814 2815 def _parse_into(self) -> t.Optional[exp.Into]: 2816 if not self._match(TokenType.INTO): 2817 return None 2818 2819 temp = self._match(TokenType.TEMPORARY) 2820 unlogged = self._match_text_seq("UNLOGGED") 2821 self._match(TokenType.TABLE) 2822 2823 return self.expression( 2824 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2825 ) 2826 2827 def _parse_from( 2828 self, joins: bool = False, skip_from_token: bool = False 2829 ) -> t.Optional[exp.From]: 2830 if not skip_from_token and not self._match(TokenType.FROM): 2831 return None 2832 2833 return self.expression( 2834 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2835 ) 2836 2837 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2838 return self.expression( 2839 exp.MatchRecognizeMeasure, 2840 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2841 this=self._parse_expression(), 2842 ) 2843 2844 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2845 if not self._match(TokenType.MATCH_RECOGNIZE): 2846 return None 2847 2848 self._match_l_paren() 2849 2850 partition = self._parse_partition_by() 2851 order = self._parse_order() 2852 2853 measures = ( 2854 self._parse_csv(self._parse_match_recognize_measure) 2855 if self._match_text_seq("MEASURES") 2856 else None 2857 ) 2858 2859 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2860 rows = exp.var("ONE ROW PER MATCH") 2861 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2862 text = "ALL ROWS PER MATCH" 2863 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2864 text += " SHOW EMPTY MATCHES" 2865 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2866 text += " OMIT EMPTY MATCHES" 2867 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2868 text += " WITH UNMATCHED ROWS" 2869 rows = exp.var(text) 2870 else: 2871 rows = None 2872 2873 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2874 text = "AFTER MATCH SKIP" 2875 if self._match_text_seq("PAST", "LAST", "ROW"): 2876 text += " PAST LAST ROW" 2877 elif self._match_text_seq("TO", "NEXT", "ROW"): 2878 text += " TO NEXT ROW" 2879 elif self._match_text_seq("TO", "FIRST"): 2880 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2881 elif self._match_text_seq("TO", "LAST"): 2882 text += f" TO LAST {self._advance_any().text}" # type: ignore 2883 after = exp.var(text) 2884 else: 2885 after = None 2886 2887 if self._match_text_seq("PATTERN"): 2888 self._match_l_paren() 2889 2890 if not self._curr: 2891 self.raise_error("Expecting )", self._curr) 2892 2893 paren = 1 2894 start = self._curr 2895 2896 while self._curr and paren > 0: 2897 if self._curr.token_type == TokenType.L_PAREN: 2898 paren += 1 2899 if self._curr.token_type == TokenType.R_PAREN: 2900 paren -= 1 2901 2902 end = self._prev 2903 self._advance() 2904 2905 if paren > 0: 2906 self.raise_error("Expecting )", self._curr) 2907 2908 pattern = exp.var(self._find_sql(start, end)) 2909 else: 2910 pattern = None 2911 2912 define = ( 2913 self._parse_csv(self._parse_name_as_expression) 2914 if self._match_text_seq("DEFINE") 2915 else None 2916 ) 2917 2918 self._match_r_paren() 2919 2920 return self.expression( 2921 exp.MatchRecognize, 2922 partition_by=partition, 2923 order=order, 2924 measures=measures, 2925 rows=rows, 2926 after=after, 2927 pattern=pattern, 2928 define=define, 2929 alias=self._parse_table_alias(), 2930 ) 2931 2932 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2933 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2934 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2935 cross_apply = False 2936 2937 if cross_apply is not None: 2938 this = self._parse_select(table=True) 2939 view = None 2940 outer = None 2941 elif self._match(TokenType.LATERAL): 2942 this = self._parse_select(table=True) 2943 view = self._match(TokenType.VIEW) 2944 outer = self._match(TokenType.OUTER) 2945 else: 2946 return None 2947 2948 if not this: 2949 this = ( 2950 self._parse_unnest() 2951 or self._parse_function() 2952 or self._parse_id_var(any_token=False) 2953 ) 2954 2955 while self._match(TokenType.DOT): 2956 this = exp.Dot( 2957 this=this, 2958 expression=self._parse_function() or self._parse_id_var(any_token=False), 2959 ) 2960 2961 if view: 2962 table = self._parse_id_var(any_token=False) 2963 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2964 table_alias: t.Optional[exp.TableAlias] = self.expression( 2965 exp.TableAlias, this=table, columns=columns 2966 ) 2967 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2968 # We move the alias from the lateral's child node to the lateral itself 2969 table_alias = this.args["alias"].pop() 2970 else: 2971 table_alias = self._parse_table_alias() 2972 2973 return self.expression( 2974 exp.Lateral, 2975 this=this, 2976 view=view, 2977 outer=outer, 2978 alias=table_alias, 2979 cross_apply=cross_apply, 2980 ) 2981 2982 def _parse_join_parts( 2983 self, 2984 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2985 return ( 2986 self._match_set(self.JOIN_METHODS) and self._prev, 2987 self._match_set(self.JOIN_SIDES) and self._prev, 2988 self._match_set(self.JOIN_KINDS) and self._prev, 2989 ) 2990 2991 def _parse_join( 2992 self, skip_join_token: bool = False, parse_bracket: bool = False 2993 ) -> t.Optional[exp.Join]: 2994 if self._match(TokenType.COMMA): 2995 return self.expression(exp.Join, this=self._parse_table()) 2996 2997 index = self._index 2998 method, side, kind = self._parse_join_parts() 2999 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3000 join = self._match(TokenType.JOIN) 3001 3002 if not skip_join_token and not join: 3003 self._retreat(index) 3004 kind = None 3005 method = None 3006 side = None 3007 3008 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3009 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3010 3011 if not skip_join_token and not join and not outer_apply and not cross_apply: 3012 return None 3013 3014 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3015 3016 if method: 3017 kwargs["method"] = method.text 3018 if side: 3019 kwargs["side"] = side.text 3020 if kind: 3021 kwargs["kind"] = kind.text 3022 if hint: 3023 kwargs["hint"] = hint 3024 3025 if self._match(TokenType.MATCH_CONDITION): 3026 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3027 3028 if self._match(TokenType.ON): 3029 kwargs["on"] = self._parse_conjunction() 3030 elif self._match(TokenType.USING): 3031 kwargs["using"] = self._parse_wrapped_id_vars() 3032 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3033 kind and kind.token_type == TokenType.CROSS 3034 ): 3035 index = self._index 3036 joins: t.Optional[list] = list(self._parse_joins()) 3037 3038 if joins and self._match(TokenType.ON): 3039 kwargs["on"] = self._parse_conjunction() 3040 elif joins and self._match(TokenType.USING): 3041 kwargs["using"] = self._parse_wrapped_id_vars() 3042 else: 3043 joins = None 3044 self._retreat(index) 3045 3046 kwargs["this"].set("joins", joins if joins else None) 3047 3048 comments = [c for token in (method, side, kind) if token for c in token.comments] 3049 return self.expression(exp.Join, comments=comments, **kwargs) 3050 3051 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3052 this = self._parse_conjunction() 3053 3054 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3055 return this 3056 3057 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3058 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3059 3060 return this 3061 3062 def _parse_index_params(self) -> exp.IndexParameters: 3063 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3064 3065 if self._match(TokenType.L_PAREN, advance=False): 3066 columns = self._parse_wrapped_csv(self._parse_with_operator) 3067 else: 3068 columns = None 3069 3070 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3071 partition_by = self._parse_partition_by() 3072 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3073 tablespace = ( 3074 self._parse_var(any_token=True) 3075 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3076 else None 3077 ) 3078 where = self._parse_where() 3079 3080 return self.expression( 3081 exp.IndexParameters, 3082 using=using, 3083 columns=columns, 3084 include=include, 3085 partition_by=partition_by, 3086 where=where, 3087 with_storage=with_storage, 3088 tablespace=tablespace, 3089 ) 3090 3091 def _parse_index( 3092 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3093 ) -> t.Optional[exp.Index]: 3094 if index or anonymous: 3095 unique = None 3096 primary = None 3097 amp = None 3098 3099 self._match(TokenType.ON) 3100 self._match(TokenType.TABLE) # hive 3101 table = self._parse_table_parts(schema=True) 3102 else: 3103 unique = self._match(TokenType.UNIQUE) 3104 primary = self._match_text_seq("PRIMARY") 3105 amp = self._match_text_seq("AMP") 3106 3107 if not self._match(TokenType.INDEX): 3108 return None 3109 3110 index = self._parse_id_var() 3111 table = None 3112 3113 params = self._parse_index_params() 3114 3115 return self.expression( 3116 exp.Index, 3117 this=index, 3118 table=table, 3119 unique=unique, 3120 primary=primary, 3121 amp=amp, 3122 params=params, 3123 ) 3124 3125 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3126 hints: t.List[exp.Expression] = [] 3127 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3128 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3129 hints.append( 3130 self.expression( 3131 exp.WithTableHint, 3132 expressions=self._parse_csv( 3133 lambda: self._parse_function() or self._parse_var(any_token=True) 3134 ), 3135 ) 3136 ) 3137 self._match_r_paren() 3138 else: 3139 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3140 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3141 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3142 3143 self._match_texts(("INDEX", "KEY")) 3144 if self._match(TokenType.FOR): 3145 hint.set("target", self._advance_any() and self._prev.text.upper()) 3146 3147 hint.set("expressions", self._parse_wrapped_id_vars()) 3148 hints.append(hint) 3149 3150 return hints or None 3151 3152 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3153 return ( 3154 (not schema and self._parse_function(optional_parens=False)) 3155 or self._parse_id_var(any_token=False) 3156 or self._parse_string_as_identifier() 3157 or self._parse_placeholder() 3158 ) 3159 3160 def _parse_table_parts( 3161 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3162 ) -> exp.Table: 3163 catalog = None 3164 db = None 3165 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3166 3167 while self._match(TokenType.DOT): 3168 if catalog: 3169 # This allows nesting the table in arbitrarily many dot expressions if needed 3170 table = self.expression( 3171 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3172 ) 3173 else: 3174 catalog = db 3175 db = table 3176 # "" used for tsql FROM a..b case 3177 table = self._parse_table_part(schema=schema) or "" 3178 3179 if ( 3180 wildcard 3181 and self._is_connected() 3182 and (isinstance(table, exp.Identifier) or not table) 3183 and self._match(TokenType.STAR) 3184 ): 3185 if isinstance(table, exp.Identifier): 3186 table.args["this"] += "*" 3187 else: 3188 table = exp.Identifier(this="*") 3189 3190 # We bubble up comments from the Identifier to the Table 3191 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3192 3193 if is_db_reference: 3194 catalog = db 3195 db = table 3196 table = None 3197 3198 if not table and not is_db_reference: 3199 self.raise_error(f"Expected table name but got {self._curr}") 3200 if not db and is_db_reference: 3201 self.raise_error(f"Expected database name but got {self._curr}") 3202 3203 return self.expression( 3204 exp.Table, 3205 comments=comments, 3206 this=table, 3207 db=db, 3208 catalog=catalog, 3209 pivots=self._parse_pivots(), 3210 ) 3211 3212 def _parse_table( 3213 self, 3214 schema: bool = False, 3215 joins: bool = False, 3216 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3217 parse_bracket: bool = False, 3218 is_db_reference: bool = False, 3219 parse_partition: bool = False, 3220 ) -> t.Optional[exp.Expression]: 3221 lateral = self._parse_lateral() 3222 if lateral: 3223 return lateral 3224 3225 unnest = self._parse_unnest() 3226 if unnest: 3227 return unnest 3228 3229 values = self._parse_derived_table_values() 3230 if values: 3231 return values 3232 3233 subquery = self._parse_select(table=True) 3234 if subquery: 3235 if not subquery.args.get("pivots"): 3236 subquery.set("pivots", self._parse_pivots()) 3237 return subquery 3238 3239 bracket = parse_bracket and self._parse_bracket(None) 3240 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3241 3242 only = self._match(TokenType.ONLY) 3243 3244 this = t.cast( 3245 exp.Expression, 3246 bracket 3247 or self._parse_bracket( 3248 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3249 ), 3250 ) 3251 3252 if only: 3253 this.set("only", only) 3254 3255 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3256 self._match_text_seq("*") 3257 3258 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3259 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3260 this.set("partition", self._parse_partition()) 3261 3262 if schema: 3263 return self._parse_schema(this=this) 3264 3265 version = self._parse_version() 3266 3267 if version: 3268 this.set("version", version) 3269 3270 if self.dialect.ALIAS_POST_TABLESAMPLE: 3271 table_sample = self._parse_table_sample() 3272 3273 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3274 if alias: 3275 this.set("alias", alias) 3276 3277 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3278 return self.expression( 3279 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3280 ) 3281 3282 this.set("hints", self._parse_table_hints()) 3283 3284 if not this.args.get("pivots"): 3285 this.set("pivots", self._parse_pivots()) 3286 3287 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3288 table_sample = self._parse_table_sample() 3289 3290 if table_sample: 3291 table_sample.set("this", this) 3292 this = table_sample 3293 3294 if joins: 3295 for join in self._parse_joins(): 3296 this.append("joins", join) 3297 3298 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3299 this.set("ordinality", True) 3300 this.set("alias", self._parse_table_alias()) 3301 3302 return this 3303 3304 def _parse_version(self) -> t.Optional[exp.Version]: 3305 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3306 this = "TIMESTAMP" 3307 elif self._match(TokenType.VERSION_SNAPSHOT): 3308 this = "VERSION" 3309 else: 3310 return None 3311 3312 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3313 kind = self._prev.text.upper() 3314 start = self._parse_bitwise() 3315 self._match_texts(("TO", "AND")) 3316 end = self._parse_bitwise() 3317 expression: t.Optional[exp.Expression] = self.expression( 3318 exp.Tuple, expressions=[start, end] 3319 ) 3320 elif self._match_text_seq("CONTAINED", "IN"): 3321 kind = "CONTAINED IN" 3322 expression = self.expression( 3323 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3324 ) 3325 elif self._match(TokenType.ALL): 3326 kind = "ALL" 3327 expression = None 3328 else: 3329 self._match_text_seq("AS", "OF") 3330 kind = "AS OF" 3331 expression = self._parse_type() 3332 3333 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3334 3335 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3336 if not self._match(TokenType.UNNEST): 3337 return None 3338 3339 expressions = self._parse_wrapped_csv(self._parse_equality) 3340 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3341 3342 alias = self._parse_table_alias() if with_alias else None 3343 3344 if alias: 3345 if self.dialect.UNNEST_COLUMN_ONLY: 3346 if alias.args.get("columns"): 3347 self.raise_error("Unexpected extra column alias in unnest.") 3348 3349 alias.set("columns", [alias.this]) 3350 alias.set("this", None) 3351 3352 columns = alias.args.get("columns") or [] 3353 if offset and len(expressions) < len(columns): 3354 offset = columns.pop() 3355 3356 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3357 self._match(TokenType.ALIAS) 3358 offset = self._parse_id_var( 3359 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3360 ) or exp.to_identifier("offset") 3361 3362 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3363 3364 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3365 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3366 if not is_derived and not self._match_text_seq("VALUES"): 3367 return None 3368 3369 expressions = self._parse_csv(self._parse_value) 3370 alias = self._parse_table_alias() 3371 3372 if is_derived: 3373 self._match_r_paren() 3374 3375 return self.expression( 3376 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3377 ) 3378 3379 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3380 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3381 as_modifier and self._match_text_seq("USING", "SAMPLE") 3382 ): 3383 return None 3384 3385 bucket_numerator = None 3386 bucket_denominator = None 3387 bucket_field = None 3388 percent = None 3389 size = None 3390 seed = None 3391 3392 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3393 matched_l_paren = self._match(TokenType.L_PAREN) 3394 3395 if self.TABLESAMPLE_CSV: 3396 num = None 3397 expressions = self._parse_csv(self._parse_primary) 3398 else: 3399 expressions = None 3400 num = ( 3401 self._parse_factor() 3402 if self._match(TokenType.NUMBER, advance=False) 3403 else self._parse_primary() or self._parse_placeholder() 3404 ) 3405 3406 if self._match_text_seq("BUCKET"): 3407 bucket_numerator = self._parse_number() 3408 self._match_text_seq("OUT", "OF") 3409 bucket_denominator = bucket_denominator = self._parse_number() 3410 self._match(TokenType.ON) 3411 bucket_field = self._parse_field() 3412 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3413 percent = num 3414 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3415 size = num 3416 else: 3417 percent = num 3418 3419 if matched_l_paren: 3420 self._match_r_paren() 3421 3422 if self._match(TokenType.L_PAREN): 3423 method = self._parse_var(upper=True) 3424 seed = self._match(TokenType.COMMA) and self._parse_number() 3425 self._match_r_paren() 3426 elif self._match_texts(("SEED", "REPEATABLE")): 3427 seed = self._parse_wrapped(self._parse_number) 3428 3429 if not method and self.DEFAULT_SAMPLING_METHOD: 3430 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3431 3432 return self.expression( 3433 exp.TableSample, 3434 expressions=expressions, 3435 method=method, 3436 bucket_numerator=bucket_numerator, 3437 bucket_denominator=bucket_denominator, 3438 bucket_field=bucket_field, 3439 percent=percent, 3440 size=size, 3441 seed=seed, 3442 ) 3443 3444 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3445 return list(iter(self._parse_pivot, None)) or None 3446 3447 def _parse_joins(self) -> t.Iterator[exp.Join]: 3448 return iter(self._parse_join, None) 3449 3450 # https://duckdb.org/docs/sql/statements/pivot 3451 def _parse_simplified_pivot(self) -> exp.Pivot: 3452 def _parse_on() -> t.Optional[exp.Expression]: 3453 this = self._parse_bitwise() 3454 return self._parse_in(this) if self._match(TokenType.IN) else this 3455 3456 this = self._parse_table() 3457 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3458 using = self._match(TokenType.USING) and self._parse_csv( 3459 lambda: self._parse_alias(self._parse_function()) 3460 ) 3461 group = self._parse_group() 3462 return self.expression( 3463 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3464 ) 3465 3466 def _parse_pivot_in(self) -> exp.In: 3467 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3468 this = self._parse_conjunction() 3469 3470 self._match(TokenType.ALIAS) 3471 alias = self._parse_field() 3472 if alias: 3473 return self.expression(exp.PivotAlias, this=this, alias=alias) 3474 3475 return this 3476 3477 value = self._parse_column() 3478 3479 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3480 self.raise_error("Expecting IN (") 3481 3482 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3483 3484 self._match_r_paren() 3485 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3486 3487 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3488 index = self._index 3489 include_nulls = None 3490 3491 if self._match(TokenType.PIVOT): 3492 unpivot = False 3493 elif self._match(TokenType.UNPIVOT): 3494 unpivot = True 3495 3496 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3497 if self._match_text_seq("INCLUDE", "NULLS"): 3498 include_nulls = True 3499 elif self._match_text_seq("EXCLUDE", "NULLS"): 3500 include_nulls = False 3501 else: 3502 return None 3503 3504 expressions = [] 3505 3506 if not self._match(TokenType.L_PAREN): 3507 self._retreat(index) 3508 return None 3509 3510 if unpivot: 3511 expressions = self._parse_csv(self._parse_column) 3512 else: 3513 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3514 3515 if not expressions: 3516 self.raise_error("Failed to parse PIVOT's aggregation list") 3517 3518 if not self._match(TokenType.FOR): 3519 self.raise_error("Expecting FOR") 3520 3521 field = self._parse_pivot_in() 3522 3523 self._match_r_paren() 3524 3525 pivot = self.expression( 3526 exp.Pivot, 3527 expressions=expressions, 3528 field=field, 3529 unpivot=unpivot, 3530 include_nulls=include_nulls, 3531 ) 3532 3533 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3534 pivot.set("alias", self._parse_table_alias()) 3535 3536 if not unpivot: 3537 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3538 3539 columns: t.List[exp.Expression] = [] 3540 for fld in pivot.args["field"].expressions: 3541 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3542 for name in names: 3543 if self.PREFIXED_PIVOT_COLUMNS: 3544 name = f"{name}_{field_name}" if name else field_name 3545 else: 3546 name = f"{field_name}_{name}" if name else field_name 3547 3548 columns.append(exp.to_identifier(name)) 3549 3550 pivot.set("columns", columns) 3551 3552 return pivot 3553 3554 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3555 return [agg.alias for agg in aggregations] 3556 3557 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3558 if not skip_where_token and not self._match(TokenType.PREWHERE): 3559 return None 3560 3561 return self.expression( 3562 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3563 ) 3564 3565 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3566 if not skip_where_token and not self._match(TokenType.WHERE): 3567 return None 3568 3569 return self.expression( 3570 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3571 ) 3572 3573 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3574 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3575 return None 3576 3577 elements: t.Dict[str, t.Any] = defaultdict(list) 3578 3579 if self._match(TokenType.ALL): 3580 elements["all"] = True 3581 elif self._match(TokenType.DISTINCT): 3582 elements["all"] = False 3583 3584 while True: 3585 expressions = self._parse_csv( 3586 lambda: None 3587 if self._match(TokenType.ROLLUP, advance=False) 3588 else self._parse_conjunction() 3589 ) 3590 if expressions: 3591 elements["expressions"].extend(expressions) 3592 3593 grouping_sets = self._parse_grouping_sets() 3594 if grouping_sets: 3595 elements["grouping_sets"].extend(grouping_sets) 3596 3597 rollup = None 3598 cube = None 3599 totals = None 3600 3601 index = self._index 3602 with_ = self._match(TokenType.WITH) 3603 if self._match(TokenType.ROLLUP): 3604 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3605 elements["rollup"].extend(ensure_list(rollup)) 3606 3607 if self._match(TokenType.CUBE): 3608 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3609 elements["cube"].extend(ensure_list(cube)) 3610 3611 if self._match_text_seq("TOTALS"): 3612 totals = True 3613 elements["totals"] = True # type: ignore 3614 3615 if not (grouping_sets or rollup or cube or totals): 3616 if with_: 3617 self._retreat(index) 3618 break 3619 3620 return self.expression(exp.Group, **elements) # type: ignore 3621 3622 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3623 if not self._match(TokenType.GROUPING_SETS): 3624 return None 3625 3626 return self._parse_wrapped_csv(self._parse_grouping_set) 3627 3628 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3629 if self._match(TokenType.L_PAREN): 3630 grouping_set = self._parse_csv(self._parse_column) 3631 self._match_r_paren() 3632 return self.expression(exp.Tuple, expressions=grouping_set) 3633 3634 return self._parse_column() 3635 3636 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3637 if not skip_having_token and not self._match(TokenType.HAVING): 3638 return None 3639 return self.expression(exp.Having, this=self._parse_conjunction()) 3640 3641 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3642 if not self._match(TokenType.QUALIFY): 3643 return None 3644 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3645 3646 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3647 if skip_start_token: 3648 start = None 3649 elif self._match(TokenType.START_WITH): 3650 start = self._parse_conjunction() 3651 else: 3652 return None 3653 3654 self._match(TokenType.CONNECT_BY) 3655 nocycle = self._match_text_seq("NOCYCLE") 3656 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3657 exp.Prior, this=self._parse_bitwise() 3658 ) 3659 connect = self._parse_conjunction() 3660 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3661 3662 if not start and self._match(TokenType.START_WITH): 3663 start = self._parse_conjunction() 3664 3665 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3666 3667 def _parse_name_as_expression(self) -> exp.Alias: 3668 return self.expression( 3669 exp.Alias, 3670 alias=self._parse_id_var(any_token=True), 3671 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3672 ) 3673 3674 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3675 if self._match_text_seq("INTERPOLATE"): 3676 return self._parse_wrapped_csv(self._parse_name_as_expression) 3677 return None 3678 3679 def _parse_order( 3680 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3681 ) -> t.Optional[exp.Expression]: 3682 siblings = None 3683 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3684 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3685 return this 3686 3687 siblings = True 3688 3689 return self.expression( 3690 exp.Order, 3691 this=this, 3692 expressions=self._parse_csv(self._parse_ordered), 3693 interpolate=self._parse_interpolate(), 3694 siblings=siblings, 3695 ) 3696 3697 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3698 if not self._match(token): 3699 return None 3700 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3701 3702 def _parse_ordered( 3703 self, parse_method: t.Optional[t.Callable] = None 3704 ) -> t.Optional[exp.Ordered]: 3705 this = parse_method() if parse_method else self._parse_conjunction() 3706 if not this: 3707 return None 3708 3709 asc = self._match(TokenType.ASC) 3710 desc = self._match(TokenType.DESC) or (asc and False) 3711 3712 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3713 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3714 3715 nulls_first = is_nulls_first or False 3716 explicitly_null_ordered = is_nulls_first or is_nulls_last 3717 3718 if ( 3719 not explicitly_null_ordered 3720 and ( 3721 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3722 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3723 ) 3724 and self.dialect.NULL_ORDERING != "nulls_are_last" 3725 ): 3726 nulls_first = True 3727 3728 if self._match_text_seq("WITH", "FILL"): 3729 with_fill = self.expression( 3730 exp.WithFill, 3731 **{ # type: ignore 3732 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3733 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3734 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3735 }, 3736 ) 3737 else: 3738 with_fill = None 3739 3740 return self.expression( 3741 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3742 ) 3743 3744 def _parse_limit( 3745 self, 3746 this: t.Optional[exp.Expression] = None, 3747 top: bool = False, 3748 skip_limit_token: bool = False, 3749 ) -> t.Optional[exp.Expression]: 3750 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3751 comments = self._prev_comments 3752 if top: 3753 limit_paren = self._match(TokenType.L_PAREN) 3754 expression = self._parse_term() if limit_paren else self._parse_number() 3755 3756 if limit_paren: 3757 self._match_r_paren() 3758 else: 3759 expression = self._parse_term() 3760 3761 if self._match(TokenType.COMMA): 3762 offset = expression 3763 expression = self._parse_term() 3764 else: 3765 offset = None 3766 3767 limit_exp = self.expression( 3768 exp.Limit, 3769 this=this, 3770 expression=expression, 3771 offset=offset, 3772 comments=comments, 3773 expressions=self._parse_limit_by(), 3774 ) 3775 3776 return limit_exp 3777 3778 if self._match(TokenType.FETCH): 3779 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3780 direction = self._prev.text.upper() if direction else "FIRST" 3781 3782 count = self._parse_field(tokens=self.FETCH_TOKENS) 3783 percent = self._match(TokenType.PERCENT) 3784 3785 self._match_set((TokenType.ROW, TokenType.ROWS)) 3786 3787 only = self._match_text_seq("ONLY") 3788 with_ties = self._match_text_seq("WITH", "TIES") 3789 3790 if only and with_ties: 3791 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3792 3793 return self.expression( 3794 exp.Fetch, 3795 direction=direction, 3796 count=count, 3797 percent=percent, 3798 with_ties=with_ties, 3799 ) 3800 3801 return this 3802 3803 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3804 if not self._match(TokenType.OFFSET): 3805 return this 3806 3807 count = self._parse_term() 3808 self._match_set((TokenType.ROW, TokenType.ROWS)) 3809 3810 return self.expression( 3811 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3812 ) 3813 3814 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3815 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3816 3817 def _parse_locks(self) -> t.List[exp.Lock]: 3818 locks = [] 3819 while True: 3820 if self._match_text_seq("FOR", "UPDATE"): 3821 update = True 3822 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3823 "LOCK", "IN", "SHARE", "MODE" 3824 ): 3825 update = False 3826 else: 3827 break 3828 3829 expressions = None 3830 if self._match_text_seq("OF"): 3831 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3832 3833 wait: t.Optional[bool | exp.Expression] = None 3834 if self._match_text_seq("NOWAIT"): 3835 wait = True 3836 elif self._match_text_seq("WAIT"): 3837 wait = self._parse_primary() 3838 elif self._match_text_seq("SKIP", "LOCKED"): 3839 wait = False 3840 3841 locks.append( 3842 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3843 ) 3844 3845 return locks 3846 3847 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3848 while this and self._match_set(self.SET_OPERATIONS): 3849 token_type = self._prev.token_type 3850 3851 if token_type == TokenType.UNION: 3852 operation = exp.Union 3853 elif token_type == TokenType.EXCEPT: 3854 operation = exp.Except 3855 else: 3856 operation = exp.Intersect 3857 3858 comments = self._prev.comments 3859 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3860 by_name = self._match_text_seq("BY", "NAME") 3861 expression = self._parse_select(nested=True, parse_set_operation=False) 3862 3863 this = self.expression( 3864 operation, 3865 comments=comments, 3866 this=this, 3867 distinct=distinct, 3868 by_name=by_name, 3869 expression=expression, 3870 ) 3871 3872 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3873 expression = this.expression 3874 3875 if expression: 3876 for arg in self.UNION_MODIFIERS: 3877 expr = expression.args.get(arg) 3878 if expr: 3879 this.set(arg, expr.pop()) 3880 3881 return this 3882 3883 def _parse_expression(self) -> t.Optional[exp.Expression]: 3884 return self._parse_alias(self._parse_conjunction()) 3885 3886 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3887 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3888 3889 def _parse_equality(self) -> t.Optional[exp.Expression]: 3890 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3891 3892 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3893 return self._parse_tokens(self._parse_range, self.COMPARISON) 3894 3895 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3896 this = this or self._parse_bitwise() 3897 negate = self._match(TokenType.NOT) 3898 3899 if self._match_set(self.RANGE_PARSERS): 3900 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3901 if not expression: 3902 return this 3903 3904 this = expression 3905 elif self._match(TokenType.ISNULL): 3906 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3907 3908 # Postgres supports ISNULL and NOTNULL for conditions. 3909 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3910 if self._match(TokenType.NOTNULL): 3911 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3912 this = self.expression(exp.Not, this=this) 3913 3914 if negate: 3915 this = self.expression(exp.Not, this=this) 3916 3917 if self._match(TokenType.IS): 3918 this = self._parse_is(this) 3919 3920 return this 3921 3922 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3923 index = self._index - 1 3924 negate = self._match(TokenType.NOT) 3925 3926 if self._match_text_seq("DISTINCT", "FROM"): 3927 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3928 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3929 3930 expression = self._parse_null() or self._parse_boolean() 3931 if not expression: 3932 self._retreat(index) 3933 return None 3934 3935 this = self.expression(exp.Is, this=this, expression=expression) 3936 return self.expression(exp.Not, this=this) if negate else this 3937 3938 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3939 unnest = self._parse_unnest(with_alias=False) 3940 if unnest: 3941 this = self.expression(exp.In, this=this, unnest=unnest) 3942 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3943 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3944 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3945 3946 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3947 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3948 else: 3949 this = self.expression(exp.In, this=this, expressions=expressions) 3950 3951 if matched_l_paren: 3952 self._match_r_paren(this) 3953 elif not self._match(TokenType.R_BRACKET, expression=this): 3954 self.raise_error("Expecting ]") 3955 else: 3956 this = self.expression(exp.In, this=this, field=self._parse_field()) 3957 3958 return this 3959 3960 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3961 low = self._parse_bitwise() 3962 self._match(TokenType.AND) 3963 high = self._parse_bitwise() 3964 return self.expression(exp.Between, this=this, low=low, high=high) 3965 3966 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3967 if not self._match(TokenType.ESCAPE): 3968 return this 3969 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3970 3971 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3972 index = self._index 3973 3974 if not self._match(TokenType.INTERVAL) and match_interval: 3975 return None 3976 3977 if self._match(TokenType.STRING, advance=False): 3978 this = self._parse_primary() 3979 else: 3980 this = self._parse_term() 3981 3982 if not this or ( 3983 isinstance(this, exp.Column) 3984 and not this.table 3985 and not this.this.quoted 3986 and this.name.upper() == "IS" 3987 ): 3988 self._retreat(index) 3989 return None 3990 3991 unit = self._parse_function() or ( 3992 not self._match(TokenType.ALIAS, advance=False) 3993 and self._parse_var(any_token=True, upper=True) 3994 ) 3995 3996 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3997 # each INTERVAL expression into this canonical form so it's easy to transpile 3998 if this and this.is_number: 3999 this = exp.Literal.string(this.name) 4000 elif this and this.is_string: 4001 parts = this.name.split() 4002 4003 if len(parts) == 2: 4004 if unit: 4005 # This is not actually a unit, it's something else (e.g. a "window side") 4006 unit = None 4007 self._retreat(self._index - 1) 4008 4009 this = exp.Literal.string(parts[0]) 4010 unit = self.expression(exp.Var, this=parts[1].upper()) 4011 4012 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4013 unit = self.expression( 4014 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4015 ) 4016 4017 return self.expression(exp.Interval, this=this, unit=unit) 4018 4019 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4020 this = self._parse_term() 4021 4022 while True: 4023 if self._match_set(self.BITWISE): 4024 this = self.expression( 4025 self.BITWISE[self._prev.token_type], 4026 this=this, 4027 expression=self._parse_term(), 4028 ) 4029 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4030 this = self.expression( 4031 exp.DPipe, 4032 this=this, 4033 expression=self._parse_term(), 4034 safe=not self.dialect.STRICT_STRING_CONCAT, 4035 ) 4036 elif self._match(TokenType.DQMARK): 4037 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4038 elif self._match_pair(TokenType.LT, TokenType.LT): 4039 this = self.expression( 4040 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4041 ) 4042 elif self._match_pair(TokenType.GT, TokenType.GT): 4043 this = self.expression( 4044 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4045 ) 4046 else: 4047 break 4048 4049 return this 4050 4051 def _parse_term(self) -> t.Optional[exp.Expression]: 4052 return self._parse_tokens(self._parse_factor, self.TERM) 4053 4054 def _parse_factor(self) -> t.Optional[exp.Expression]: 4055 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4056 this = parse_method() 4057 4058 while self._match_set(self.FACTOR): 4059 this = self.expression( 4060 self.FACTOR[self._prev.token_type], 4061 this=this, 4062 comments=self._prev_comments, 4063 expression=parse_method(), 4064 ) 4065 if isinstance(this, exp.Div): 4066 this.args["typed"] = self.dialect.TYPED_DIVISION 4067 this.args["safe"] = self.dialect.SAFE_DIVISION 4068 4069 return this 4070 4071 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4072 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4073 4074 def _parse_unary(self) -> t.Optional[exp.Expression]: 4075 if self._match_set(self.UNARY_PARSERS): 4076 return self.UNARY_PARSERS[self._prev.token_type](self) 4077 return self._parse_at_time_zone(self._parse_type()) 4078 4079 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4080 interval = parse_interval and self._parse_interval() 4081 if interval: 4082 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4083 while True: 4084 index = self._index 4085 self._match(TokenType.PLUS) 4086 4087 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4088 self._retreat(index) 4089 break 4090 4091 interval = self.expression( # type: ignore 4092 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4093 ) 4094 4095 return interval 4096 4097 index = self._index 4098 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4099 this = self._parse_column() 4100 4101 if data_type: 4102 if isinstance(this, exp.Literal): 4103 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4104 if parser: 4105 return parser(self, this, data_type) 4106 return self.expression(exp.Cast, this=this, to=data_type) 4107 if not data_type.expressions: 4108 self._retreat(index) 4109 return self._parse_column() 4110 return self._parse_column_ops(data_type) 4111 4112 return this and self._parse_column_ops(this) 4113 4114 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4115 this = self._parse_type() 4116 if not this: 4117 return None 4118 4119 if isinstance(this, exp.Column) and not this.table: 4120 this = exp.var(this.name.upper()) 4121 4122 return self.expression( 4123 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4124 ) 4125 4126 def _parse_types( 4127 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4128 ) -> t.Optional[exp.Expression]: 4129 index = self._index 4130 4131 this: t.Optional[exp.Expression] = None 4132 prefix = self._match_text_seq("SYSUDTLIB", ".") 4133 4134 if not self._match_set(self.TYPE_TOKENS): 4135 identifier = allow_identifiers and self._parse_id_var( 4136 any_token=False, tokens=(TokenType.VAR,) 4137 ) 4138 if identifier: 4139 tokens = self.dialect.tokenize(identifier.name) 4140 4141 if len(tokens) != 1: 4142 self.raise_error("Unexpected identifier", self._prev) 4143 4144 if tokens[0].token_type in self.TYPE_TOKENS: 4145 self._prev = tokens[0] 4146 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4147 type_name = identifier.name 4148 4149 while self._match(TokenType.DOT): 4150 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4151 4152 this = exp.DataType.build(type_name, udt=True) 4153 else: 4154 self._retreat(self._index - 1) 4155 return None 4156 else: 4157 return None 4158 4159 type_token = self._prev.token_type 4160 4161 if type_token == TokenType.PSEUDO_TYPE: 4162 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4163 4164 if type_token == TokenType.OBJECT_IDENTIFIER: 4165 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4166 4167 nested = type_token in self.NESTED_TYPE_TOKENS 4168 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4169 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4170 expressions = None 4171 maybe_func = False 4172 4173 if self._match(TokenType.L_PAREN): 4174 if is_struct: 4175 expressions = self._parse_csv(self._parse_struct_types) 4176 elif nested: 4177 expressions = self._parse_csv( 4178 lambda: self._parse_types( 4179 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4180 ) 4181 ) 4182 elif type_token in self.ENUM_TYPE_TOKENS: 4183 expressions = self._parse_csv(self._parse_equality) 4184 elif is_aggregate: 4185 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4186 any_token=False, tokens=(TokenType.VAR,) 4187 ) 4188 if not func_or_ident or not self._match(TokenType.COMMA): 4189 return None 4190 expressions = self._parse_csv( 4191 lambda: self._parse_types( 4192 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4193 ) 4194 ) 4195 expressions.insert(0, func_or_ident) 4196 else: 4197 expressions = self._parse_csv(self._parse_type_size) 4198 4199 if not expressions or not self._match(TokenType.R_PAREN): 4200 self._retreat(index) 4201 return None 4202 4203 maybe_func = True 4204 4205 values: t.Optional[t.List[exp.Expression]] = None 4206 4207 if nested and self._match(TokenType.LT): 4208 if is_struct: 4209 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4210 else: 4211 expressions = self._parse_csv( 4212 lambda: self._parse_types( 4213 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4214 ) 4215 ) 4216 4217 if not self._match(TokenType.GT): 4218 self.raise_error("Expecting >") 4219 4220 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4221 values = self._parse_csv(self._parse_conjunction) 4222 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4223 4224 if type_token in self.TIMESTAMPS: 4225 if self._match_text_seq("WITH", "TIME", "ZONE"): 4226 maybe_func = False 4227 tz_type = ( 4228 exp.DataType.Type.TIMETZ 4229 if type_token in self.TIMES 4230 else exp.DataType.Type.TIMESTAMPTZ 4231 ) 4232 this = exp.DataType(this=tz_type, expressions=expressions) 4233 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4234 maybe_func = False 4235 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4236 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4237 maybe_func = False 4238 elif type_token == TokenType.INTERVAL: 4239 unit = self._parse_var(upper=True) 4240 if unit: 4241 if self._match_text_seq("TO"): 4242 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4243 4244 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4245 else: 4246 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4247 4248 if maybe_func and check_func: 4249 index2 = self._index 4250 peek = self._parse_string() 4251 4252 if not peek: 4253 self._retreat(index) 4254 return None 4255 4256 self._retreat(index2) 4257 4258 if not this: 4259 if self._match_text_seq("UNSIGNED"): 4260 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4261 if not unsigned_type_token: 4262 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4263 4264 type_token = unsigned_type_token or type_token 4265 4266 this = exp.DataType( 4267 this=exp.DataType.Type[type_token.value], 4268 expressions=expressions, 4269 nested=nested, 4270 values=values, 4271 prefix=prefix, 4272 ) 4273 elif expressions: 4274 this.set("expressions", expressions) 4275 4276 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4277 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4278 4279 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4280 converter = self.TYPE_CONVERTER.get(this.this) 4281 if converter: 4282 this = converter(t.cast(exp.DataType, this)) 4283 4284 return this 4285 4286 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4287 index = self._index 4288 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4289 self._match(TokenType.COLON) 4290 column_def = self._parse_column_def(this) 4291 4292 if type_required and ( 4293 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4294 ): 4295 self._retreat(index) 4296 return self._parse_types() 4297 4298 return column_def 4299 4300 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4301 if not self._match_text_seq("AT", "TIME", "ZONE"): 4302 return this 4303 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4304 4305 def _parse_column(self) -> t.Optional[exp.Expression]: 4306 this = self._parse_column_reference() 4307 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4308 4309 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4310 this = self._parse_field() 4311 if ( 4312 not this 4313 and self._match(TokenType.VALUES, advance=False) 4314 and self.VALUES_FOLLOWED_BY_PAREN 4315 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4316 ): 4317 this = self._parse_id_var() 4318 4319 if isinstance(this, exp.Identifier): 4320 # We bubble up comments from the Identifier to the Column 4321 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4322 4323 return this 4324 4325 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4326 this = self._parse_bracket(this) 4327 4328 while self._match_set(self.COLUMN_OPERATORS): 4329 op_token = self._prev.token_type 4330 op = self.COLUMN_OPERATORS.get(op_token) 4331 4332 if op_token == TokenType.DCOLON: 4333 field = self._parse_types() 4334 if not field: 4335 self.raise_error("Expected type") 4336 elif op and self._curr: 4337 field = self._parse_column_reference() 4338 else: 4339 field = self._parse_field(any_token=True, anonymous_func=True) 4340 4341 if isinstance(field, exp.Func) and this: 4342 # bigquery allows function calls like x.y.count(...) 4343 # SAFE.SUBSTR(...) 4344 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4345 this = exp.replace_tree( 4346 this, 4347 lambda n: ( 4348 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4349 if n.table 4350 else n.this 4351 ) 4352 if isinstance(n, exp.Column) 4353 else n, 4354 ) 4355 4356 if op: 4357 this = op(self, this, field) 4358 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4359 this = self.expression( 4360 exp.Column, 4361 this=field, 4362 table=this.this, 4363 db=this.args.get("table"), 4364 catalog=this.args.get("db"), 4365 ) 4366 else: 4367 this = self.expression(exp.Dot, this=this, expression=field) 4368 this = self._parse_bracket(this) 4369 return this 4370 4371 def _parse_primary(self) -> t.Optional[exp.Expression]: 4372 if self._match_set(self.PRIMARY_PARSERS): 4373 token_type = self._prev.token_type 4374 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4375 4376 if token_type == TokenType.STRING: 4377 expressions = [primary] 4378 while self._match(TokenType.STRING): 4379 expressions.append(exp.Literal.string(self._prev.text)) 4380 4381 if len(expressions) > 1: 4382 return self.expression(exp.Concat, expressions=expressions) 4383 4384 return primary 4385 4386 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4387 return exp.Literal.number(f"0.{self._prev.text}") 4388 4389 if self._match(TokenType.L_PAREN): 4390 comments = self._prev_comments 4391 query = self._parse_select() 4392 4393 if query: 4394 expressions = [query] 4395 else: 4396 expressions = self._parse_expressions() 4397 4398 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4399 4400 if not this and self._match(TokenType.R_PAREN, advance=False): 4401 this = self.expression(exp.Tuple) 4402 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4403 this = self._parse_subquery(this=this, parse_alias=False) 4404 elif isinstance(this, exp.Subquery): 4405 this = self._parse_subquery( 4406 this=self._parse_set_operations(this), parse_alias=False 4407 ) 4408 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4409 this = self.expression(exp.Tuple, expressions=expressions) 4410 else: 4411 this = self.expression(exp.Paren, this=this) 4412 4413 if this: 4414 this.add_comments(comments) 4415 4416 self._match_r_paren(expression=this) 4417 return this 4418 4419 return None 4420 4421 def _parse_field( 4422 self, 4423 any_token: bool = False, 4424 tokens: t.Optional[t.Collection[TokenType]] = None, 4425 anonymous_func: bool = False, 4426 ) -> t.Optional[exp.Expression]: 4427 if anonymous_func: 4428 field = ( 4429 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4430 or self._parse_primary() 4431 ) 4432 else: 4433 field = self._parse_primary() or self._parse_function( 4434 anonymous=anonymous_func, any_token=any_token 4435 ) 4436 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4437 4438 def _parse_function( 4439 self, 4440 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4441 anonymous: bool = False, 4442 optional_parens: bool = True, 4443 any_token: bool = False, 4444 ) -> t.Optional[exp.Expression]: 4445 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4446 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4447 fn_syntax = False 4448 if ( 4449 self._match(TokenType.L_BRACE, advance=False) 4450 and self._next 4451 and self._next.text.upper() == "FN" 4452 ): 4453 self._advance(2) 4454 fn_syntax = True 4455 4456 func = self._parse_function_call( 4457 functions=functions, 4458 anonymous=anonymous, 4459 optional_parens=optional_parens, 4460 any_token=any_token, 4461 ) 4462 4463 if fn_syntax: 4464 self._match(TokenType.R_BRACE) 4465 4466 return func 4467 4468 def _parse_function_call( 4469 self, 4470 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4471 anonymous: bool = False, 4472 optional_parens: bool = True, 4473 any_token: bool = False, 4474 ) -> t.Optional[exp.Expression]: 4475 if not self._curr: 4476 return None 4477 4478 comments = self._curr.comments 4479 token_type = self._curr.token_type 4480 this = self._curr.text 4481 upper = this.upper() 4482 4483 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4484 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4485 self._advance() 4486 return self._parse_window(parser(self)) 4487 4488 if not self._next or self._next.token_type != TokenType.L_PAREN: 4489 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4490 self._advance() 4491 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4492 4493 return None 4494 4495 if any_token: 4496 if token_type in self.RESERVED_TOKENS: 4497 return None 4498 elif token_type not in self.FUNC_TOKENS: 4499 return None 4500 4501 self._advance(2) 4502 4503 parser = self.FUNCTION_PARSERS.get(upper) 4504 if parser and not anonymous: 4505 this = parser(self) 4506 else: 4507 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4508 4509 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4510 this = self.expression(subquery_predicate, this=self._parse_select()) 4511 self._match_r_paren() 4512 return this 4513 4514 if functions is None: 4515 functions = self.FUNCTIONS 4516 4517 function = functions.get(upper) 4518 4519 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4520 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4521 4522 if alias: 4523 args = self._kv_to_prop_eq(args) 4524 4525 if function and not anonymous: 4526 if "dialect" in function.__code__.co_varnames: 4527 func = function(args, dialect=self.dialect) 4528 else: 4529 func = function(args) 4530 4531 func = self.validate_expression(func, args) 4532 if not self.dialect.NORMALIZE_FUNCTIONS: 4533 func.meta["name"] = this 4534 4535 this = func 4536 else: 4537 if token_type == TokenType.IDENTIFIER: 4538 this = exp.Identifier(this=this, quoted=True) 4539 this = self.expression(exp.Anonymous, this=this, expressions=args) 4540 4541 if isinstance(this, exp.Expression): 4542 this.add_comments(comments) 4543 4544 self._match_r_paren(this) 4545 return self._parse_window(this) 4546 4547 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4548 transformed = [] 4549 4550 for e in expressions: 4551 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4552 if isinstance(e, exp.Alias): 4553 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4554 4555 if not isinstance(e, exp.PropertyEQ): 4556 e = self.expression( 4557 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4558 ) 4559 4560 if isinstance(e.this, exp.Column): 4561 e.this.replace(e.this.this) 4562 4563 transformed.append(e) 4564 4565 return transformed 4566 4567 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4568 return self._parse_column_def(self._parse_id_var()) 4569 4570 def _parse_user_defined_function( 4571 self, kind: t.Optional[TokenType] = None 4572 ) -> t.Optional[exp.Expression]: 4573 this = self._parse_id_var() 4574 4575 while self._match(TokenType.DOT): 4576 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4577 4578 if not self._match(TokenType.L_PAREN): 4579 return this 4580 4581 expressions = self._parse_csv(self._parse_function_parameter) 4582 self._match_r_paren() 4583 return self.expression( 4584 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4585 ) 4586 4587 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4588 literal = self._parse_primary() 4589 if literal: 4590 return self.expression(exp.Introducer, this=token.text, expression=literal) 4591 4592 return self.expression(exp.Identifier, this=token.text) 4593 4594 def _parse_session_parameter(self) -> exp.SessionParameter: 4595 kind = None 4596 this = self._parse_id_var() or self._parse_primary() 4597 4598 if this and self._match(TokenType.DOT): 4599 kind = this.name 4600 this = self._parse_var() or self._parse_primary() 4601 4602 return self.expression(exp.SessionParameter, this=this, kind=kind) 4603 4604 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4605 index = self._index 4606 4607 if self._match(TokenType.L_PAREN): 4608 expressions = t.cast( 4609 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4610 ) 4611 4612 if not self._match(TokenType.R_PAREN): 4613 self._retreat(index) 4614 else: 4615 expressions = [self._parse_id_var()] 4616 4617 if self._match_set(self.LAMBDAS): 4618 return self.LAMBDAS[self._prev.token_type](self, expressions) 4619 4620 self._retreat(index) 4621 4622 this: t.Optional[exp.Expression] 4623 4624 if self._match(TokenType.DISTINCT): 4625 this = self.expression( 4626 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4627 ) 4628 else: 4629 this = self._parse_select_or_expression(alias=alias) 4630 4631 return self._parse_limit( 4632 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4633 ) 4634 4635 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4636 index = self._index 4637 if not self._match(TokenType.L_PAREN): 4638 return this 4639 4640 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4641 # expr can be of both types 4642 if self._match_set(self.SELECT_START_TOKENS): 4643 self._retreat(index) 4644 return this 4645 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4646 self._match_r_paren() 4647 return self.expression(exp.Schema, this=this, expressions=args) 4648 4649 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4650 return self._parse_column_def(self._parse_field(any_token=True)) 4651 4652 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4653 # column defs are not really columns, they're identifiers 4654 if isinstance(this, exp.Column): 4655 this = this.this 4656 4657 kind = self._parse_types(schema=True) 4658 4659 if self._match_text_seq("FOR", "ORDINALITY"): 4660 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4661 4662 constraints: t.List[exp.Expression] = [] 4663 4664 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4665 ("ALIAS", "MATERIALIZED") 4666 ): 4667 persisted = self._prev.text.upper() == "MATERIALIZED" 4668 constraints.append( 4669 self.expression( 4670 exp.ComputedColumnConstraint, 4671 this=self._parse_conjunction(), 4672 persisted=persisted or self._match_text_seq("PERSISTED"), 4673 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4674 ) 4675 ) 4676 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4677 self._match(TokenType.ALIAS) 4678 constraints.append( 4679 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4680 ) 4681 4682 while True: 4683 constraint = self._parse_column_constraint() 4684 if not constraint: 4685 break 4686 constraints.append(constraint) 4687 4688 if not kind and not constraints: 4689 return this 4690 4691 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4692 4693 def _parse_auto_increment( 4694 self, 4695 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4696 start = None 4697 increment = None 4698 4699 if self._match(TokenType.L_PAREN, advance=False): 4700 args = self._parse_wrapped_csv(self._parse_bitwise) 4701 start = seq_get(args, 0) 4702 increment = seq_get(args, 1) 4703 elif self._match_text_seq("START"): 4704 start = self._parse_bitwise() 4705 self._match_text_seq("INCREMENT") 4706 increment = self._parse_bitwise() 4707 4708 if start and increment: 4709 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4710 4711 return exp.AutoIncrementColumnConstraint() 4712 4713 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4714 if not self._match_text_seq("REFRESH"): 4715 self._retreat(self._index - 1) 4716 return None 4717 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4718 4719 def _parse_compress(self) -> exp.CompressColumnConstraint: 4720 if self._match(TokenType.L_PAREN, advance=False): 4721 return self.expression( 4722 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4723 ) 4724 4725 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4726 4727 def _parse_generated_as_identity( 4728 self, 4729 ) -> ( 4730 exp.GeneratedAsIdentityColumnConstraint 4731 | exp.ComputedColumnConstraint 4732 | exp.GeneratedAsRowColumnConstraint 4733 ): 4734 if self._match_text_seq("BY", "DEFAULT"): 4735 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4736 this = self.expression( 4737 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4738 ) 4739 else: 4740 self._match_text_seq("ALWAYS") 4741 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4742 4743 self._match(TokenType.ALIAS) 4744 4745 if self._match_text_seq("ROW"): 4746 start = self._match_text_seq("START") 4747 if not start: 4748 self._match(TokenType.END) 4749 hidden = self._match_text_seq("HIDDEN") 4750 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4751 4752 identity = self._match_text_seq("IDENTITY") 4753 4754 if self._match(TokenType.L_PAREN): 4755 if self._match(TokenType.START_WITH): 4756 this.set("start", self._parse_bitwise()) 4757 if self._match_text_seq("INCREMENT", "BY"): 4758 this.set("increment", self._parse_bitwise()) 4759 if self._match_text_seq("MINVALUE"): 4760 this.set("minvalue", self._parse_bitwise()) 4761 if self._match_text_seq("MAXVALUE"): 4762 this.set("maxvalue", self._parse_bitwise()) 4763 4764 if self._match_text_seq("CYCLE"): 4765 this.set("cycle", True) 4766 elif self._match_text_seq("NO", "CYCLE"): 4767 this.set("cycle", False) 4768 4769 if not identity: 4770 this.set("expression", self._parse_range()) 4771 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4772 args = self._parse_csv(self._parse_bitwise) 4773 this.set("start", seq_get(args, 0)) 4774 this.set("increment", seq_get(args, 1)) 4775 4776 self._match_r_paren() 4777 4778 return this 4779 4780 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4781 self._match_text_seq("LENGTH") 4782 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4783 4784 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4785 if self._match_text_seq("NULL"): 4786 return self.expression(exp.NotNullColumnConstraint) 4787 if self._match_text_seq("CASESPECIFIC"): 4788 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4789 if self._match_text_seq("FOR", "REPLICATION"): 4790 return self.expression(exp.NotForReplicationColumnConstraint) 4791 return None 4792 4793 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4794 if self._match(TokenType.CONSTRAINT): 4795 this = self._parse_id_var() 4796 else: 4797 this = None 4798 4799 if self._match_texts(self.CONSTRAINT_PARSERS): 4800 return self.expression( 4801 exp.ColumnConstraint, 4802 this=this, 4803 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4804 ) 4805 4806 return this 4807 4808 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4809 if not self._match(TokenType.CONSTRAINT): 4810 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4811 4812 return self.expression( 4813 exp.Constraint, 4814 this=self._parse_id_var(), 4815 expressions=self._parse_unnamed_constraints(), 4816 ) 4817 4818 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4819 constraints = [] 4820 while True: 4821 constraint = self._parse_unnamed_constraint() or self._parse_function() 4822 if not constraint: 4823 break 4824 constraints.append(constraint) 4825 4826 return constraints 4827 4828 def _parse_unnamed_constraint( 4829 self, constraints: t.Optional[t.Collection[str]] = None 4830 ) -> t.Optional[exp.Expression]: 4831 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4832 constraints or self.CONSTRAINT_PARSERS 4833 ): 4834 return None 4835 4836 constraint = self._prev.text.upper() 4837 if constraint not in self.CONSTRAINT_PARSERS: 4838 self.raise_error(f"No parser found for schema constraint {constraint}.") 4839 4840 return self.CONSTRAINT_PARSERS[constraint](self) 4841 4842 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4843 self._match_text_seq("KEY") 4844 return self.expression( 4845 exp.UniqueColumnConstraint, 4846 this=self._parse_schema(self._parse_id_var(any_token=False)), 4847 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4848 on_conflict=self._parse_on_conflict(), 4849 ) 4850 4851 def _parse_key_constraint_options(self) -> t.List[str]: 4852 options = [] 4853 while True: 4854 if not self._curr: 4855 break 4856 4857 if self._match(TokenType.ON): 4858 action = None 4859 on = self._advance_any() and self._prev.text 4860 4861 if self._match_text_seq("NO", "ACTION"): 4862 action = "NO ACTION" 4863 elif self._match_text_seq("CASCADE"): 4864 action = "CASCADE" 4865 elif self._match_text_seq("RESTRICT"): 4866 action = "RESTRICT" 4867 elif self._match_pair(TokenType.SET, TokenType.NULL): 4868 action = "SET NULL" 4869 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4870 action = "SET DEFAULT" 4871 else: 4872 self.raise_error("Invalid key constraint") 4873 4874 options.append(f"ON {on} {action}") 4875 elif self._match_text_seq("NOT", "ENFORCED"): 4876 options.append("NOT ENFORCED") 4877 elif self._match_text_seq("DEFERRABLE"): 4878 options.append("DEFERRABLE") 4879 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4880 options.append("INITIALLY DEFERRED") 4881 elif self._match_text_seq("NORELY"): 4882 options.append("NORELY") 4883 elif self._match_text_seq("MATCH", "FULL"): 4884 options.append("MATCH FULL") 4885 else: 4886 break 4887 4888 return options 4889 4890 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4891 if match and not self._match(TokenType.REFERENCES): 4892 return None 4893 4894 expressions = None 4895 this = self._parse_table(schema=True) 4896 options = self._parse_key_constraint_options() 4897 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4898 4899 def _parse_foreign_key(self) -> exp.ForeignKey: 4900 expressions = self._parse_wrapped_id_vars() 4901 reference = self._parse_references() 4902 options = {} 4903 4904 while self._match(TokenType.ON): 4905 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4906 self.raise_error("Expected DELETE or UPDATE") 4907 4908 kind = self._prev.text.lower() 4909 4910 if self._match_text_seq("NO", "ACTION"): 4911 action = "NO ACTION" 4912 elif self._match(TokenType.SET): 4913 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4914 action = "SET " + self._prev.text.upper() 4915 else: 4916 self._advance() 4917 action = self._prev.text.upper() 4918 4919 options[kind] = action 4920 4921 return self.expression( 4922 exp.ForeignKey, 4923 expressions=expressions, 4924 reference=reference, 4925 **options, # type: ignore 4926 ) 4927 4928 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4929 return self._parse_field() 4930 4931 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4932 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4933 self._retreat(self._index - 1) 4934 return None 4935 4936 id_vars = self._parse_wrapped_id_vars() 4937 return self.expression( 4938 exp.PeriodForSystemTimeConstraint, 4939 this=seq_get(id_vars, 0), 4940 expression=seq_get(id_vars, 1), 4941 ) 4942 4943 def _parse_primary_key( 4944 self, wrapped_optional: bool = False, in_props: bool = False 4945 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4946 desc = ( 4947 self._match_set((TokenType.ASC, TokenType.DESC)) 4948 and self._prev.token_type == TokenType.DESC 4949 ) 4950 4951 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4952 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4953 4954 expressions = self._parse_wrapped_csv( 4955 self._parse_primary_key_part, optional=wrapped_optional 4956 ) 4957 options = self._parse_key_constraint_options() 4958 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4959 4960 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4961 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4962 4963 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4964 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4965 return this 4966 4967 bracket_kind = self._prev.token_type 4968 expressions = self._parse_csv( 4969 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4970 ) 4971 4972 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4973 self.raise_error("Expected ]") 4974 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4975 self.raise_error("Expected }") 4976 4977 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4978 if bracket_kind == TokenType.L_BRACE: 4979 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4980 elif not this or this.name.upper() == "ARRAY": 4981 this = self.expression(exp.Array, expressions=expressions) 4982 else: 4983 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4984 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4985 4986 self._add_comments(this) 4987 return self._parse_bracket(this) 4988 4989 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4990 if self._match(TokenType.COLON): 4991 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4992 return this 4993 4994 def _parse_case(self) -> t.Optional[exp.Expression]: 4995 ifs = [] 4996 default = None 4997 4998 comments = self._prev_comments 4999 expression = self._parse_conjunction() 5000 5001 while self._match(TokenType.WHEN): 5002 this = self._parse_conjunction() 5003 self._match(TokenType.THEN) 5004 then = self._parse_conjunction() 5005 ifs.append(self.expression(exp.If, this=this, true=then)) 5006 5007 if self._match(TokenType.ELSE): 5008 default = self._parse_conjunction() 5009 5010 if not self._match(TokenType.END): 5011 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5012 default = exp.column("interval") 5013 else: 5014 self.raise_error("Expected END after CASE", self._prev) 5015 5016 return self.expression( 5017 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5018 ) 5019 5020 def _parse_if(self) -> t.Optional[exp.Expression]: 5021 if self._match(TokenType.L_PAREN): 5022 args = self._parse_csv(self._parse_conjunction) 5023 this = self.validate_expression(exp.If.from_arg_list(args), args) 5024 self._match_r_paren() 5025 else: 5026 index = self._index - 1 5027 5028 if self.NO_PAREN_IF_COMMANDS and index == 0: 5029 return self._parse_as_command(self._prev) 5030 5031 condition = self._parse_conjunction() 5032 5033 if not condition: 5034 self._retreat(index) 5035 return None 5036 5037 self._match(TokenType.THEN) 5038 true = self._parse_conjunction() 5039 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5040 self._match(TokenType.END) 5041 this = self.expression(exp.If, this=condition, true=true, false=false) 5042 5043 return this 5044 5045 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5046 if not self._match_text_seq("VALUE", "FOR"): 5047 self._retreat(self._index - 1) 5048 return None 5049 5050 return self.expression( 5051 exp.NextValueFor, 5052 this=self._parse_column(), 5053 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5054 ) 5055 5056 def _parse_extract(self) -> exp.Extract: 5057 this = self._parse_function() or self._parse_var() or self._parse_type() 5058 5059 if self._match(TokenType.FROM): 5060 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5061 5062 if not self._match(TokenType.COMMA): 5063 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5064 5065 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5066 5067 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5068 this = self._parse_conjunction() 5069 5070 if not self._match(TokenType.ALIAS): 5071 if self._match(TokenType.COMMA): 5072 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5073 5074 self.raise_error("Expected AS after CAST") 5075 5076 fmt = None 5077 to = self._parse_types() 5078 5079 if self._match(TokenType.FORMAT): 5080 fmt_string = self._parse_string() 5081 fmt = self._parse_at_time_zone(fmt_string) 5082 5083 if not to: 5084 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5085 if to.this in exp.DataType.TEMPORAL_TYPES: 5086 this = self.expression( 5087 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5088 this=this, 5089 format=exp.Literal.string( 5090 format_time( 5091 fmt_string.this if fmt_string else "", 5092 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5093 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5094 ) 5095 ), 5096 ) 5097 5098 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5099 this.set("zone", fmt.args["zone"]) 5100 return this 5101 elif not to: 5102 self.raise_error("Expected TYPE after CAST") 5103 elif isinstance(to, exp.Identifier): 5104 to = exp.DataType.build(to.name, udt=True) 5105 elif to.this == exp.DataType.Type.CHAR: 5106 if self._match(TokenType.CHARACTER_SET): 5107 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5108 5109 return self.expression( 5110 exp.Cast if strict else exp.TryCast, 5111 this=this, 5112 to=to, 5113 format=fmt, 5114 safe=safe, 5115 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5116 ) 5117 5118 def _parse_string_agg(self) -> exp.Expression: 5119 if self._match(TokenType.DISTINCT): 5120 args: t.List[t.Optional[exp.Expression]] = [ 5121 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5122 ] 5123 if self._match(TokenType.COMMA): 5124 args.extend(self._parse_csv(self._parse_conjunction)) 5125 else: 5126 args = self._parse_csv(self._parse_conjunction) # type: ignore 5127 5128 index = self._index 5129 if not self._match(TokenType.R_PAREN) and args: 5130 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5131 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5132 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5133 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5134 5135 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5136 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5137 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5138 if not self._match_text_seq("WITHIN", "GROUP"): 5139 self._retreat(index) 5140 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5141 5142 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5143 order = self._parse_order(this=seq_get(args, 0)) 5144 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5145 5146 def _parse_convert( 5147 self, strict: bool, safe: t.Optional[bool] = None 5148 ) -> t.Optional[exp.Expression]: 5149 this = self._parse_bitwise() 5150 5151 if self._match(TokenType.USING): 5152 to: t.Optional[exp.Expression] = self.expression( 5153 exp.CharacterSet, this=self._parse_var() 5154 ) 5155 elif self._match(TokenType.COMMA): 5156 to = self._parse_types() 5157 else: 5158 to = None 5159 5160 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5161 5162 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5163 """ 5164 There are generally two variants of the DECODE function: 5165 5166 - DECODE(bin, charset) 5167 - DECODE(expression, search, result [, search, result] ... [, default]) 5168 5169 The second variant will always be parsed into a CASE expression. Note that NULL 5170 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5171 instead of relying on pattern matching. 5172 """ 5173 args = self._parse_csv(self._parse_conjunction) 5174 5175 if len(args) < 3: 5176 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5177 5178 expression, *expressions = args 5179 if not expression: 5180 return None 5181 5182 ifs = [] 5183 for search, result in zip(expressions[::2], expressions[1::2]): 5184 if not search or not result: 5185 return None 5186 5187 if isinstance(search, exp.Literal): 5188 ifs.append( 5189 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5190 ) 5191 elif isinstance(search, exp.Null): 5192 ifs.append( 5193 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5194 ) 5195 else: 5196 cond = exp.or_( 5197 exp.EQ(this=expression.copy(), expression=search), 5198 exp.and_( 5199 exp.Is(this=expression.copy(), expression=exp.Null()), 5200 exp.Is(this=search.copy(), expression=exp.Null()), 5201 copy=False, 5202 ), 5203 copy=False, 5204 ) 5205 ifs.append(exp.If(this=cond, true=result)) 5206 5207 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5208 5209 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5210 self._match_text_seq("KEY") 5211 key = self._parse_column() 5212 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5213 self._match_text_seq("VALUE") 5214 value = self._parse_bitwise() 5215 5216 if not key and not value: 5217 return None 5218 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5219 5220 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5221 if not this or not self._match_text_seq("FORMAT", "JSON"): 5222 return this 5223 5224 return self.expression(exp.FormatJson, this=this) 5225 5226 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5227 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5228 for value in values: 5229 if self._match_text_seq(value, "ON", on): 5230 return f"{value} ON {on}" 5231 5232 return None 5233 5234 @t.overload 5235 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5236 5237 @t.overload 5238 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5239 5240 def _parse_json_object(self, agg=False): 5241 star = self._parse_star() 5242 expressions = ( 5243 [star] 5244 if star 5245 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5246 ) 5247 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5248 5249 unique_keys = None 5250 if self._match_text_seq("WITH", "UNIQUE"): 5251 unique_keys = True 5252 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5253 unique_keys = False 5254 5255 self._match_text_seq("KEYS") 5256 5257 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5258 self._parse_type() 5259 ) 5260 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5261 5262 return self.expression( 5263 exp.JSONObjectAgg if agg else exp.JSONObject, 5264 expressions=expressions, 5265 null_handling=null_handling, 5266 unique_keys=unique_keys, 5267 return_type=return_type, 5268 encoding=encoding, 5269 ) 5270 5271 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5272 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5273 if not self._match_text_seq("NESTED"): 5274 this = self._parse_id_var() 5275 kind = self._parse_types(allow_identifiers=False) 5276 nested = None 5277 else: 5278 this = None 5279 kind = None 5280 nested = True 5281 5282 path = self._match_text_seq("PATH") and self._parse_string() 5283 nested_schema = nested and self._parse_json_schema() 5284 5285 return self.expression( 5286 exp.JSONColumnDef, 5287 this=this, 5288 kind=kind, 5289 path=path, 5290 nested_schema=nested_schema, 5291 ) 5292 5293 def _parse_json_schema(self) -> exp.JSONSchema: 5294 self._match_text_seq("COLUMNS") 5295 return self.expression( 5296 exp.JSONSchema, 5297 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5298 ) 5299 5300 def _parse_json_table(self) -> exp.JSONTable: 5301 this = self._parse_format_json(self._parse_bitwise()) 5302 path = self._match(TokenType.COMMA) and self._parse_string() 5303 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5304 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5305 schema = self._parse_json_schema() 5306 5307 return exp.JSONTable( 5308 this=this, 5309 schema=schema, 5310 path=path, 5311 error_handling=error_handling, 5312 empty_handling=empty_handling, 5313 ) 5314 5315 def _parse_match_against(self) -> exp.MatchAgainst: 5316 expressions = self._parse_csv(self._parse_column) 5317 5318 self._match_text_seq(")", "AGAINST", "(") 5319 5320 this = self._parse_string() 5321 5322 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5323 modifier = "IN NATURAL LANGUAGE MODE" 5324 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5325 modifier = f"{modifier} WITH QUERY EXPANSION" 5326 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5327 modifier = "IN BOOLEAN MODE" 5328 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5329 modifier = "WITH QUERY EXPANSION" 5330 else: 5331 modifier = None 5332 5333 return self.expression( 5334 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5335 ) 5336 5337 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5338 def _parse_open_json(self) -> exp.OpenJSON: 5339 this = self._parse_bitwise() 5340 path = self._match(TokenType.COMMA) and self._parse_string() 5341 5342 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5343 this = self._parse_field(any_token=True) 5344 kind = self._parse_types() 5345 path = self._parse_string() 5346 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5347 5348 return self.expression( 5349 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5350 ) 5351 5352 expressions = None 5353 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5354 self._match_l_paren() 5355 expressions = self._parse_csv(_parse_open_json_column_def) 5356 5357 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5358 5359 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5360 args = self._parse_csv(self._parse_bitwise) 5361 5362 if self._match(TokenType.IN): 5363 return self.expression( 5364 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5365 ) 5366 5367 if haystack_first: 5368 haystack = seq_get(args, 0) 5369 needle = seq_get(args, 1) 5370 else: 5371 needle = seq_get(args, 0) 5372 haystack = seq_get(args, 1) 5373 5374 return self.expression( 5375 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5376 ) 5377 5378 def _parse_predict(self) -> exp.Predict: 5379 self._match_text_seq("MODEL") 5380 this = self._parse_table() 5381 5382 self._match(TokenType.COMMA) 5383 self._match_text_seq("TABLE") 5384 5385 return self.expression( 5386 exp.Predict, 5387 this=this, 5388 expression=self._parse_table(), 5389 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5390 ) 5391 5392 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5393 args = self._parse_csv(self._parse_table) 5394 return exp.JoinHint(this=func_name.upper(), expressions=args) 5395 5396 def _parse_substring(self) -> exp.Substring: 5397 # Postgres supports the form: substring(string [from int] [for int]) 5398 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5399 5400 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5401 5402 if self._match(TokenType.FROM): 5403 args.append(self._parse_bitwise()) 5404 if self._match(TokenType.FOR): 5405 if len(args) == 1: 5406 args.append(exp.Literal.number(1)) 5407 args.append(self._parse_bitwise()) 5408 5409 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5410 5411 def _parse_trim(self) -> exp.Trim: 5412 # https://www.w3resource.com/sql/character-functions/trim.php 5413 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5414 5415 position = None 5416 collation = None 5417 expression = None 5418 5419 if self._match_texts(self.TRIM_TYPES): 5420 position = self._prev.text.upper() 5421 5422 this = self._parse_bitwise() 5423 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5424 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5425 expression = self._parse_bitwise() 5426 5427 if invert_order: 5428 this, expression = expression, this 5429 5430 if self._match(TokenType.COLLATE): 5431 collation = self._parse_bitwise() 5432 5433 return self.expression( 5434 exp.Trim, this=this, position=position, expression=expression, collation=collation 5435 ) 5436 5437 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5438 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5439 5440 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5441 return self._parse_window(self._parse_id_var(), alias=True) 5442 5443 def _parse_respect_or_ignore_nulls( 5444 self, this: t.Optional[exp.Expression] 5445 ) -> t.Optional[exp.Expression]: 5446 if self._match_text_seq("IGNORE", "NULLS"): 5447 return self.expression(exp.IgnoreNulls, this=this) 5448 if self._match_text_seq("RESPECT", "NULLS"): 5449 return self.expression(exp.RespectNulls, this=this) 5450 return this 5451 5452 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5453 if self._match(TokenType.HAVING): 5454 self._match_texts(("MAX", "MIN")) 5455 max = self._prev.text.upper() != "MIN" 5456 return self.expression( 5457 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5458 ) 5459 5460 return this 5461 5462 def _parse_window( 5463 self, this: t.Optional[exp.Expression], alias: bool = False 5464 ) -> t.Optional[exp.Expression]: 5465 func = this 5466 comments = func.comments if isinstance(func, exp.Expression) else None 5467 5468 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5469 self._match(TokenType.WHERE) 5470 this = self.expression( 5471 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5472 ) 5473 self._match_r_paren() 5474 5475 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5476 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5477 if self._match_text_seq("WITHIN", "GROUP"): 5478 order = self._parse_wrapped(self._parse_order) 5479 this = self.expression(exp.WithinGroup, this=this, expression=order) 5480 5481 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5482 # Some dialects choose to implement and some do not. 5483 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5484 5485 # There is some code above in _parse_lambda that handles 5486 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5487 5488 # The below changes handle 5489 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5490 5491 # Oracle allows both formats 5492 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5493 # and Snowflake chose to do the same for familiarity 5494 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5495 if isinstance(this, exp.AggFunc): 5496 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5497 5498 if ignore_respect and ignore_respect is not this: 5499 ignore_respect.replace(ignore_respect.this) 5500 this = self.expression(ignore_respect.__class__, this=this) 5501 5502 this = self._parse_respect_or_ignore_nulls(this) 5503 5504 # bigquery select from window x AS (partition by ...) 5505 if alias: 5506 over = None 5507 self._match(TokenType.ALIAS) 5508 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5509 return this 5510 else: 5511 over = self._prev.text.upper() 5512 5513 if comments and isinstance(func, exp.Expression): 5514 func.pop_comments() 5515 5516 if not self._match(TokenType.L_PAREN): 5517 return self.expression( 5518 exp.Window, 5519 comments=comments, 5520 this=this, 5521 alias=self._parse_id_var(False), 5522 over=over, 5523 ) 5524 5525 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5526 5527 first = self._match(TokenType.FIRST) 5528 if self._match_text_seq("LAST"): 5529 first = False 5530 5531 partition, order = self._parse_partition_and_order() 5532 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5533 5534 if kind: 5535 self._match(TokenType.BETWEEN) 5536 start = self._parse_window_spec() 5537 self._match(TokenType.AND) 5538 end = self._parse_window_spec() 5539 5540 spec = self.expression( 5541 exp.WindowSpec, 5542 kind=kind, 5543 start=start["value"], 5544 start_side=start["side"], 5545 end=end["value"], 5546 end_side=end["side"], 5547 ) 5548 else: 5549 spec = None 5550 5551 self._match_r_paren() 5552 5553 window = self.expression( 5554 exp.Window, 5555 comments=comments, 5556 this=this, 5557 partition_by=partition, 5558 order=order, 5559 spec=spec, 5560 alias=window_alias, 5561 over=over, 5562 first=first, 5563 ) 5564 5565 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5566 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5567 return self._parse_window(window, alias=alias) 5568 5569 return window 5570 5571 def _parse_partition_and_order( 5572 self, 5573 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5574 return self._parse_partition_by(), self._parse_order() 5575 5576 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5577 self._match(TokenType.BETWEEN) 5578 5579 return { 5580 "value": ( 5581 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5582 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5583 or self._parse_bitwise() 5584 ), 5585 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5586 } 5587 5588 def _parse_alias( 5589 self, this: t.Optional[exp.Expression], explicit: bool = False 5590 ) -> t.Optional[exp.Expression]: 5591 any_token = self._match(TokenType.ALIAS) 5592 comments = self._prev_comments or [] 5593 5594 if explicit and not any_token: 5595 return this 5596 5597 if self._match(TokenType.L_PAREN): 5598 aliases = self.expression( 5599 exp.Aliases, 5600 comments=comments, 5601 this=this, 5602 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5603 ) 5604 self._match_r_paren(aliases) 5605 return aliases 5606 5607 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5608 self.STRING_ALIASES and self._parse_string_as_identifier() 5609 ) 5610 5611 if alias: 5612 comments.extend(alias.pop_comments()) 5613 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5614 column = this.this 5615 5616 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5617 if not this.comments and column and column.comments: 5618 this.comments = column.pop_comments() 5619 5620 return this 5621 5622 def _parse_id_var( 5623 self, 5624 any_token: bool = True, 5625 tokens: t.Optional[t.Collection[TokenType]] = None, 5626 ) -> t.Optional[exp.Expression]: 5627 expression = self._parse_identifier() 5628 if not expression and ( 5629 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5630 ): 5631 quoted = self._prev.token_type == TokenType.STRING 5632 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5633 5634 return expression 5635 5636 def _parse_string(self) -> t.Optional[exp.Expression]: 5637 if self._match_set(self.STRING_PARSERS): 5638 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5639 return self._parse_placeholder() 5640 5641 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5642 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5643 5644 def _parse_number(self) -> t.Optional[exp.Expression]: 5645 if self._match_set(self.NUMERIC_PARSERS): 5646 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5647 return self._parse_placeholder() 5648 5649 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5650 if self._match(TokenType.IDENTIFIER): 5651 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5652 return self._parse_placeholder() 5653 5654 def _parse_var( 5655 self, 5656 any_token: bool = False, 5657 tokens: t.Optional[t.Collection[TokenType]] = None, 5658 upper: bool = False, 5659 ) -> t.Optional[exp.Expression]: 5660 if ( 5661 (any_token and self._advance_any()) 5662 or self._match(TokenType.VAR) 5663 or (self._match_set(tokens) if tokens else False) 5664 ): 5665 return self.expression( 5666 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5667 ) 5668 return self._parse_placeholder() 5669 5670 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5671 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5672 self._advance() 5673 return self._prev 5674 return None 5675 5676 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5677 return self._parse_var() or self._parse_string() 5678 5679 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5680 return self._parse_primary() or self._parse_var(any_token=True) 5681 5682 def _parse_null(self) -> t.Optional[exp.Expression]: 5683 if self._match_set(self.NULL_TOKENS): 5684 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5685 return self._parse_placeholder() 5686 5687 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5688 if self._match(TokenType.TRUE): 5689 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5690 if self._match(TokenType.FALSE): 5691 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5692 return self._parse_placeholder() 5693 5694 def _parse_star(self) -> t.Optional[exp.Expression]: 5695 if self._match(TokenType.STAR): 5696 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5697 return self._parse_placeholder() 5698 5699 def _parse_parameter(self) -> exp.Parameter: 5700 this = self._parse_identifier() or self._parse_primary_or_var() 5701 return self.expression(exp.Parameter, this=this) 5702 5703 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5704 if self._match_set(self.PLACEHOLDER_PARSERS): 5705 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5706 if placeholder: 5707 return placeholder 5708 self._advance(-1) 5709 return None 5710 5711 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5712 if not self._match_texts(keywords): 5713 return None 5714 if self._match(TokenType.L_PAREN, advance=False): 5715 return self._parse_wrapped_csv(self._parse_expression) 5716 5717 expression = self._parse_expression() 5718 return [expression] if expression else None 5719 5720 def _parse_csv( 5721 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5722 ) -> t.List[exp.Expression]: 5723 parse_result = parse_method() 5724 items = [parse_result] if parse_result is not None else [] 5725 5726 while self._match(sep): 5727 self._add_comments(parse_result) 5728 parse_result = parse_method() 5729 if parse_result is not None: 5730 items.append(parse_result) 5731 5732 return items 5733 5734 def _parse_tokens( 5735 self, parse_method: t.Callable, expressions: t.Dict 5736 ) -> t.Optional[exp.Expression]: 5737 this = parse_method() 5738 5739 while self._match_set(expressions): 5740 this = self.expression( 5741 expressions[self._prev.token_type], 5742 this=this, 5743 comments=self._prev_comments, 5744 expression=parse_method(), 5745 ) 5746 5747 return this 5748 5749 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5750 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5751 5752 def _parse_wrapped_csv( 5753 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5754 ) -> t.List[exp.Expression]: 5755 return self._parse_wrapped( 5756 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5757 ) 5758 5759 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5760 wrapped = self._match(TokenType.L_PAREN) 5761 if not wrapped and not optional: 5762 self.raise_error("Expecting (") 5763 parse_result = parse_method() 5764 if wrapped: 5765 self._match_r_paren() 5766 return parse_result 5767 5768 def _parse_expressions(self) -> t.List[exp.Expression]: 5769 return self._parse_csv(self._parse_expression) 5770 5771 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5772 return self._parse_select() or self._parse_set_operations( 5773 self._parse_expression() if alias else self._parse_conjunction() 5774 ) 5775 5776 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5777 return self._parse_query_modifiers( 5778 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5779 ) 5780 5781 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5782 this = None 5783 if self._match_texts(self.TRANSACTION_KIND): 5784 this = self._prev.text 5785 5786 self._match_texts(("TRANSACTION", "WORK")) 5787 5788 modes = [] 5789 while True: 5790 mode = [] 5791 while self._match(TokenType.VAR): 5792 mode.append(self._prev.text) 5793 5794 if mode: 5795 modes.append(" ".join(mode)) 5796 if not self._match(TokenType.COMMA): 5797 break 5798 5799 return self.expression(exp.Transaction, this=this, modes=modes) 5800 5801 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5802 chain = None 5803 savepoint = None 5804 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5805 5806 self._match_texts(("TRANSACTION", "WORK")) 5807 5808 if self._match_text_seq("TO"): 5809 self._match_text_seq("SAVEPOINT") 5810 savepoint = self._parse_id_var() 5811 5812 if self._match(TokenType.AND): 5813 chain = not self._match_text_seq("NO") 5814 self._match_text_seq("CHAIN") 5815 5816 if is_rollback: 5817 return self.expression(exp.Rollback, savepoint=savepoint) 5818 5819 return self.expression(exp.Commit, chain=chain) 5820 5821 def _parse_refresh(self) -> exp.Refresh: 5822 self._match(TokenType.TABLE) 5823 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5824 5825 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5826 if not self._match_text_seq("ADD"): 5827 return None 5828 5829 self._match(TokenType.COLUMN) 5830 exists_column = self._parse_exists(not_=True) 5831 expression = self._parse_field_def() 5832 5833 if expression: 5834 expression.set("exists", exists_column) 5835 5836 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5837 if self._match_texts(("FIRST", "AFTER")): 5838 position = self._prev.text 5839 column_position = self.expression( 5840 exp.ColumnPosition, this=self._parse_column(), position=position 5841 ) 5842 expression.set("position", column_position) 5843 5844 return expression 5845 5846 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5847 drop = self._match(TokenType.DROP) and self._parse_drop() 5848 if drop and not isinstance(drop, exp.Command): 5849 drop.set("kind", drop.args.get("kind", "COLUMN")) 5850 return drop 5851 5852 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5853 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5854 return self.expression( 5855 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5856 ) 5857 5858 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5859 index = self._index - 1 5860 5861 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5862 return self._parse_csv( 5863 lambda: self.expression( 5864 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5865 ) 5866 ) 5867 5868 self._retreat(index) 5869 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5870 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5871 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5872 5873 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5874 if self._match_texts(self.ALTER_ALTER_PARSERS): 5875 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5876 5877 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5878 # keyword after ALTER we default to parsing this statement 5879 self._match(TokenType.COLUMN) 5880 column = self._parse_field(any_token=True) 5881 5882 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5883 return self.expression(exp.AlterColumn, this=column, drop=True) 5884 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5885 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5886 if self._match(TokenType.COMMENT): 5887 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5888 5889 self._match_text_seq("SET", "DATA") 5890 self._match_text_seq("TYPE") 5891 return self.expression( 5892 exp.AlterColumn, 5893 this=column, 5894 dtype=self._parse_types(), 5895 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5896 using=self._match(TokenType.USING) and self._parse_conjunction(), 5897 ) 5898 5899 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5900 if self._match_texts(("ALL", "EVEN", "AUTO")): 5901 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5902 5903 self._match_text_seq("KEY", "DISTKEY") 5904 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5905 5906 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5907 if compound: 5908 self._match_text_seq("SORTKEY") 5909 5910 if self._match(TokenType.L_PAREN, advance=False): 5911 return self.expression( 5912 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5913 ) 5914 5915 self._match_texts(("AUTO", "NONE")) 5916 return self.expression( 5917 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5918 ) 5919 5920 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5921 index = self._index - 1 5922 5923 partition_exists = self._parse_exists() 5924 if self._match(TokenType.PARTITION, advance=False): 5925 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5926 5927 self._retreat(index) 5928 return self._parse_csv(self._parse_drop_column) 5929 5930 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5931 if self._match(TokenType.COLUMN): 5932 exists = self._parse_exists() 5933 old_column = self._parse_column() 5934 to = self._match_text_seq("TO") 5935 new_column = self._parse_column() 5936 5937 if old_column is None or to is None or new_column is None: 5938 return None 5939 5940 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5941 5942 self._match_text_seq("TO") 5943 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5944 5945 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5946 start = self._prev 5947 5948 if not self._match(TokenType.TABLE): 5949 return self._parse_as_command(start) 5950 5951 exists = self._parse_exists() 5952 only = self._match_text_seq("ONLY") 5953 this = self._parse_table(schema=True) 5954 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5955 5956 if self._next: 5957 self._advance() 5958 5959 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5960 if parser: 5961 actions = ensure_list(parser(self)) 5962 options = self._parse_csv(self._parse_property) 5963 5964 if not self._curr and actions: 5965 return self.expression( 5966 exp.AlterTable, 5967 this=this, 5968 exists=exists, 5969 actions=actions, 5970 only=only, 5971 options=options, 5972 cluster=cluster, 5973 ) 5974 5975 return self._parse_as_command(start) 5976 5977 def _parse_merge(self) -> exp.Merge: 5978 self._match(TokenType.INTO) 5979 target = self._parse_table() 5980 5981 if target and self._match(TokenType.ALIAS, advance=False): 5982 target.set("alias", self._parse_table_alias()) 5983 5984 self._match(TokenType.USING) 5985 using = self._parse_table() 5986 5987 self._match(TokenType.ON) 5988 on = self._parse_conjunction() 5989 5990 return self.expression( 5991 exp.Merge, 5992 this=target, 5993 using=using, 5994 on=on, 5995 expressions=self._parse_when_matched(), 5996 ) 5997 5998 def _parse_when_matched(self) -> t.List[exp.When]: 5999 whens = [] 6000 6001 while self._match(TokenType.WHEN): 6002 matched = not self._match(TokenType.NOT) 6003 self._match_text_seq("MATCHED") 6004 source = ( 6005 False 6006 if self._match_text_seq("BY", "TARGET") 6007 else self._match_text_seq("BY", "SOURCE") 6008 ) 6009 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6010 6011 self._match(TokenType.THEN) 6012 6013 if self._match(TokenType.INSERT): 6014 _this = self._parse_star() 6015 if _this: 6016 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6017 else: 6018 then = self.expression( 6019 exp.Insert, 6020 this=self._parse_value(), 6021 expression=self._match_text_seq("VALUES") and self._parse_value(), 6022 ) 6023 elif self._match(TokenType.UPDATE): 6024 expressions = self._parse_star() 6025 if expressions: 6026 then = self.expression(exp.Update, expressions=expressions) 6027 else: 6028 then = self.expression( 6029 exp.Update, 6030 expressions=self._match(TokenType.SET) 6031 and self._parse_csv(self._parse_equality), 6032 ) 6033 elif self._match(TokenType.DELETE): 6034 then = self.expression(exp.Var, this=self._prev.text) 6035 else: 6036 then = None 6037 6038 whens.append( 6039 self.expression( 6040 exp.When, 6041 matched=matched, 6042 source=source, 6043 condition=condition, 6044 then=then, 6045 ) 6046 ) 6047 return whens 6048 6049 def _parse_show(self) -> t.Optional[exp.Expression]: 6050 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6051 if parser: 6052 return parser(self) 6053 return self._parse_as_command(self._prev) 6054 6055 def _parse_set_item_assignment( 6056 self, kind: t.Optional[str] = None 6057 ) -> t.Optional[exp.Expression]: 6058 index = self._index 6059 6060 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6061 return self._parse_set_transaction(global_=kind == "GLOBAL") 6062 6063 left = self._parse_primary() or self._parse_column() 6064 assignment_delimiter = self._match_texts(("=", "TO")) 6065 6066 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6067 self._retreat(index) 6068 return None 6069 6070 right = self._parse_statement() or self._parse_id_var() 6071 this = self.expression(exp.EQ, this=left, expression=right) 6072 6073 return self.expression(exp.SetItem, this=this, kind=kind) 6074 6075 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6076 self._match_text_seq("TRANSACTION") 6077 characteristics = self._parse_csv( 6078 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6079 ) 6080 return self.expression( 6081 exp.SetItem, 6082 expressions=characteristics, 6083 kind="TRANSACTION", 6084 **{"global": global_}, # type: ignore 6085 ) 6086 6087 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6088 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6089 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6090 6091 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6092 index = self._index 6093 set_ = self.expression( 6094 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6095 ) 6096 6097 if self._curr: 6098 self._retreat(index) 6099 return self._parse_as_command(self._prev) 6100 6101 return set_ 6102 6103 def _parse_var_from_options( 6104 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6105 ) -> t.Optional[exp.Var]: 6106 start = self._curr 6107 if not start: 6108 return None 6109 6110 option = start.text.upper() 6111 continuations = options.get(option) 6112 6113 index = self._index 6114 self._advance() 6115 for keywords in continuations or []: 6116 if isinstance(keywords, str): 6117 keywords = (keywords,) 6118 6119 if self._match_text_seq(*keywords): 6120 option = f"{option} {' '.join(keywords)}" 6121 break 6122 else: 6123 if continuations or continuations is None: 6124 if raise_unmatched: 6125 self.raise_error(f"Unknown option {option}") 6126 6127 self._retreat(index) 6128 return None 6129 6130 return exp.var(option) 6131 6132 def _parse_as_command(self, start: Token) -> exp.Command: 6133 while self._curr: 6134 self._advance() 6135 text = self._find_sql(start, self._prev) 6136 size = len(start.text) 6137 self._warn_unsupported() 6138 return exp.Command(this=text[:size], expression=text[size:]) 6139 6140 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6141 settings = [] 6142 6143 self._match_l_paren() 6144 kind = self._parse_id_var() 6145 6146 if self._match(TokenType.L_PAREN): 6147 while True: 6148 key = self._parse_id_var() 6149 value = self._parse_primary() 6150 6151 if not key and value is None: 6152 break 6153 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6154 self._match(TokenType.R_PAREN) 6155 6156 self._match_r_paren() 6157 6158 return self.expression( 6159 exp.DictProperty, 6160 this=this, 6161 kind=kind.this if kind else None, 6162 settings=settings, 6163 ) 6164 6165 def _parse_dict_range(self, this: str) -> exp.DictRange: 6166 self._match_l_paren() 6167 has_min = self._match_text_seq("MIN") 6168 if has_min: 6169 min = self._parse_var() or self._parse_primary() 6170 self._match_text_seq("MAX") 6171 max = self._parse_var() or self._parse_primary() 6172 else: 6173 max = self._parse_var() or self._parse_primary() 6174 min = exp.Literal.number(0) 6175 self._match_r_paren() 6176 return self.expression(exp.DictRange, this=this, min=min, max=max) 6177 6178 def _parse_comprehension( 6179 self, this: t.Optional[exp.Expression] 6180 ) -> t.Optional[exp.Comprehension]: 6181 index = self._index 6182 expression = self._parse_column() 6183 if not self._match(TokenType.IN): 6184 self._retreat(index - 1) 6185 return None 6186 iterator = self._parse_column() 6187 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6188 return self.expression( 6189 exp.Comprehension, 6190 this=this, 6191 expression=expression, 6192 iterator=iterator, 6193 condition=condition, 6194 ) 6195 6196 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6197 if self._match(TokenType.HEREDOC_STRING): 6198 return self.expression(exp.Heredoc, this=self._prev.text) 6199 6200 if not self._match_text_seq("$"): 6201 return None 6202 6203 tags = ["$"] 6204 tag_text = None 6205 6206 if self._is_connected(): 6207 self._advance() 6208 tags.append(self._prev.text.upper()) 6209 else: 6210 self.raise_error("No closing $ found") 6211 6212 if tags[-1] != "$": 6213 if self._is_connected() and self._match_text_seq("$"): 6214 tag_text = tags[-1] 6215 tags.append("$") 6216 else: 6217 self.raise_error("No closing $ found") 6218 6219 heredoc_start = self._curr 6220 6221 while self._curr: 6222 if self._match_text_seq(*tags, advance=False): 6223 this = self._find_sql(heredoc_start, self._prev) 6224 self._advance(len(tags)) 6225 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6226 6227 self._advance() 6228 6229 self.raise_error(f"No closing {''.join(tags)} found") 6230 return None 6231 6232 def _find_parser( 6233 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6234 ) -> t.Optional[t.Callable]: 6235 if not self._curr: 6236 return None 6237 6238 index = self._index 6239 this = [] 6240 while True: 6241 # The current token might be multiple words 6242 curr = self._curr.text.upper() 6243 key = curr.split(" ") 6244 this.append(curr) 6245 6246 self._advance() 6247 result, trie = in_trie(trie, key) 6248 if result == TrieResult.FAILED: 6249 break 6250 6251 if result == TrieResult.EXISTS: 6252 subparser = parsers[" ".join(this)] 6253 return subparser 6254 6255 self._retreat(index) 6256 return None 6257 6258 def _match(self, token_type, advance=True, expression=None): 6259 if not self._curr: 6260 return None 6261 6262 if self._curr.token_type == token_type: 6263 if advance: 6264 self._advance() 6265 self._add_comments(expression) 6266 return True 6267 6268 return None 6269 6270 def _match_set(self, types, advance=True): 6271 if not self._curr: 6272 return None 6273 6274 if self._curr.token_type in types: 6275 if advance: 6276 self._advance() 6277 return True 6278 6279 return None 6280 6281 def _match_pair(self, token_type_a, token_type_b, advance=True): 6282 if not self._curr or not self._next: 6283 return None 6284 6285 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6286 if advance: 6287 self._advance(2) 6288 return True 6289 6290 return None 6291 6292 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6293 if not self._match(TokenType.L_PAREN, expression=expression): 6294 self.raise_error("Expecting (") 6295 6296 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6297 if not self._match(TokenType.R_PAREN, expression=expression): 6298 self.raise_error("Expecting )") 6299 6300 def _match_texts(self, texts, advance=True): 6301 if self._curr and self._curr.text.upper() in texts: 6302 if advance: 6303 self._advance() 6304 return True 6305 return None 6306 6307 def _match_text_seq(self, *texts, advance=True): 6308 index = self._index 6309 for text in texts: 6310 if self._curr and self._curr.text.upper() == text: 6311 self._advance() 6312 else: 6313 self._retreat(index) 6314 return None 6315 6316 if not advance: 6317 self._retreat(index) 6318 6319 return True 6320 6321 def _replace_lambda( 6322 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6323 ) -> t.Optional[exp.Expression]: 6324 if not node: 6325 return node 6326 6327 for column in node.find_all(exp.Column): 6328 if column.parts[0].name in lambda_variables: 6329 dot_or_id = column.to_dot() if column.table else column.this 6330 parent = column.parent 6331 6332 while isinstance(parent, exp.Dot): 6333 if not isinstance(parent.parent, exp.Dot): 6334 parent.replace(dot_or_id) 6335 break 6336 parent = parent.parent 6337 else: 6338 if column is node: 6339 node = dot_or_id 6340 else: 6341 column.replace(dot_or_id) 6342 return node 6343 6344 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6345 start = self._prev 6346 6347 # Not to be confused with TRUNCATE(number, decimals) function call 6348 if self._match(TokenType.L_PAREN): 6349 self._retreat(self._index - 2) 6350 return self._parse_function() 6351 6352 # Clickhouse supports TRUNCATE DATABASE as well 6353 is_database = self._match(TokenType.DATABASE) 6354 6355 self._match(TokenType.TABLE) 6356 6357 exists = self._parse_exists(not_=False) 6358 6359 expressions = self._parse_csv( 6360 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6361 ) 6362 6363 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6364 6365 if self._match_text_seq("RESTART", "IDENTITY"): 6366 identity = "RESTART" 6367 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6368 identity = "CONTINUE" 6369 else: 6370 identity = None 6371 6372 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6373 option = self._prev.text 6374 else: 6375 option = None 6376 6377 partition = self._parse_partition() 6378 6379 # Fallback case 6380 if self._curr: 6381 return self._parse_as_command(start) 6382 6383 return self.expression( 6384 exp.TruncateTable, 6385 expressions=expressions, 6386 is_database=is_database, 6387 exists=exists, 6388 cluster=cluster, 6389 identity=identity, 6390 option=option, 6391 partition=partition, 6392 ) 6393 6394 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6395 this = self._parse_ordered(self._parse_opclass) 6396 6397 if not self._match(TokenType.WITH): 6398 return this 6399 6400 op = self._parse_var(any_token=True) 6401 6402 return self.expression(exp.WithOperator, this=this, op=op) 6403 6404 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6405 opts = [] 6406 self._match(TokenType.EQ) 6407 self._match(TokenType.L_PAREN) 6408 while self._curr and not self._match(TokenType.R_PAREN): 6409 opts.append(self._parse_conjunction()) 6410 self._match(TokenType.COMMA) 6411 return opts 6412 6413 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6414 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6415 6416 options = [] 6417 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6418 option = self._parse_unquoted_field() 6419 value = None 6420 6421 # Some options are defined as functions with the values as params 6422 if not isinstance(option, exp.Func): 6423 prev = self._prev.text.upper() 6424 # Different dialects might separate options and values by white space, "=" and "AS" 6425 self._match(TokenType.EQ) 6426 self._match(TokenType.ALIAS) 6427 6428 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6429 # Snowflake FILE_FORMAT case 6430 value = self._parse_wrapped_options() 6431 else: 6432 value = self._parse_unquoted_field() 6433 6434 param = self.expression(exp.CopyParameter, this=option, expression=value) 6435 options.append(param) 6436 6437 if sep: 6438 self._match(sep) 6439 6440 return options 6441 6442 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6443 expr = self.expression(exp.Credentials) 6444 6445 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6446 expr.set("storage", self._parse_conjunction()) 6447 if self._match_text_seq("CREDENTIALS"): 6448 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6449 creds = ( 6450 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6451 ) 6452 expr.set("credentials", creds) 6453 if self._match_text_seq("ENCRYPTION"): 6454 expr.set("encryption", self._parse_wrapped_options()) 6455 if self._match_text_seq("IAM_ROLE"): 6456 expr.set("iam_role", self._parse_field()) 6457 if self._match_text_seq("REGION"): 6458 expr.set("region", self._parse_field()) 6459 6460 return expr 6461 6462 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6463 return self._parse_field() 6464 6465 def _parse_copy(self) -> exp.Copy | exp.Command: 6466 start = self._prev 6467 6468 self._match(TokenType.INTO) 6469 6470 this = ( 6471 self._parse_conjunction() 6472 if self._match(TokenType.L_PAREN, advance=False) 6473 else self._parse_table(schema=True) 6474 ) 6475 6476 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6477 6478 files = self._parse_csv(self._parse_file_location) 6479 credentials = self._parse_credentials() 6480 6481 self._match_text_seq("WITH") 6482 6483 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6484 6485 # Fallback case 6486 if self._curr: 6487 return self._parse_as_command(start) 6488 6489 return self.expression( 6490 exp.Copy, 6491 this=this, 6492 kind=kind, 6493 credentials=credentials, 6494 files=files, 6495 params=params, 6496 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 53 # Default argument order is base, expression 54 this = seq_get(args, 0) 55 expression = seq_get(args, 1) 56 57 if expression: 58 if not dialect.LOG_BASE_FIRST: 59 this, expression = expression, this 60 return exp.Log(this=this, expression=expression) 61 62 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
82def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 83 def _builder(args: t.List, dialect: Dialect) -> E: 84 expression = expr_type( 85 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 86 ) 87 if len(args) > 2 and expr_type is exp.JSONExtract: 88 expression.set("expressions", args[2:]) 89 90 return expression 91 92 return _builder
95def build_mod(args: t.List) -> exp.Mod: 96 this = seq_get(args, 0) 97 expression = seq_get(args, 1) 98 99 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 100 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 101 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 102 103 return exp.Mod(this=this, expression=expression)
116class Parser(metaclass=_Parser): 117 """ 118 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 119 120 Args: 121 error_level: The desired error level. 122 Default: ErrorLevel.IMMEDIATE 123 error_message_context: The amount of context to capture from a query string when displaying 124 the error message (in number of characters). 125 Default: 100 126 max_errors: Maximum number of error messages to include in a raised ParseError. 127 This is only relevant if error_level is ErrorLevel.RAISE. 128 Default: 3 129 """ 130 131 FUNCTIONS: t.Dict[str, t.Callable] = { 132 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 133 "CONCAT": lambda args, dialect: exp.Concat( 134 expressions=args, 135 safe=not dialect.STRICT_STRING_CONCAT, 136 coalesce=dialect.CONCAT_COALESCE, 137 ), 138 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 139 expressions=args, 140 safe=not dialect.STRICT_STRING_CONCAT, 141 coalesce=dialect.CONCAT_COALESCE, 142 ), 143 "DATE_TO_DATE_STR": lambda args: exp.Cast( 144 this=seq_get(args, 0), 145 to=exp.DataType(this=exp.DataType.Type.TEXT), 146 ), 147 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 148 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 149 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 150 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 151 "LIKE": build_like, 152 "LOG": build_logarithm, 153 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 154 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 155 "MOD": build_mod, 156 "TIME_TO_TIME_STR": lambda args: exp.Cast( 157 this=seq_get(args, 0), 158 to=exp.DataType(this=exp.DataType.Type.TEXT), 159 ), 160 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 161 this=exp.Cast( 162 this=seq_get(args, 0), 163 to=exp.DataType(this=exp.DataType.Type.TEXT), 164 ), 165 start=exp.Literal.number(1), 166 length=exp.Literal.number(10), 167 ), 168 "VAR_MAP": build_var_map, 169 "LOWER": build_lower, 170 "UPPER": build_upper, 171 "HEX": build_hex, 172 "TO_HEX": build_hex, 173 } 174 175 NO_PAREN_FUNCTIONS = { 176 TokenType.CURRENT_DATE: exp.CurrentDate, 177 TokenType.CURRENT_DATETIME: exp.CurrentDate, 178 TokenType.CURRENT_TIME: exp.CurrentTime, 179 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 180 TokenType.CURRENT_USER: exp.CurrentUser, 181 } 182 183 STRUCT_TYPE_TOKENS = { 184 TokenType.NESTED, 185 TokenType.OBJECT, 186 TokenType.STRUCT, 187 } 188 189 NESTED_TYPE_TOKENS = { 190 TokenType.ARRAY, 191 TokenType.LOWCARDINALITY, 192 TokenType.MAP, 193 TokenType.NULLABLE, 194 *STRUCT_TYPE_TOKENS, 195 } 196 197 ENUM_TYPE_TOKENS = { 198 TokenType.ENUM, 199 TokenType.ENUM8, 200 TokenType.ENUM16, 201 } 202 203 AGGREGATE_TYPE_TOKENS = { 204 TokenType.AGGREGATEFUNCTION, 205 TokenType.SIMPLEAGGREGATEFUNCTION, 206 } 207 208 TYPE_TOKENS = { 209 TokenType.BIT, 210 TokenType.BOOLEAN, 211 TokenType.TINYINT, 212 TokenType.UTINYINT, 213 TokenType.SMALLINT, 214 TokenType.USMALLINT, 215 TokenType.INT, 216 TokenType.UINT, 217 TokenType.BIGINT, 218 TokenType.UBIGINT, 219 TokenType.INT128, 220 TokenType.UINT128, 221 TokenType.INT256, 222 TokenType.UINT256, 223 TokenType.MEDIUMINT, 224 TokenType.UMEDIUMINT, 225 TokenType.FIXEDSTRING, 226 TokenType.FLOAT, 227 TokenType.DOUBLE, 228 TokenType.CHAR, 229 TokenType.NCHAR, 230 TokenType.VARCHAR, 231 TokenType.NVARCHAR, 232 TokenType.BPCHAR, 233 TokenType.TEXT, 234 TokenType.MEDIUMTEXT, 235 TokenType.LONGTEXT, 236 TokenType.MEDIUMBLOB, 237 TokenType.LONGBLOB, 238 TokenType.BINARY, 239 TokenType.VARBINARY, 240 TokenType.JSON, 241 TokenType.JSONB, 242 TokenType.INTERVAL, 243 TokenType.TINYBLOB, 244 TokenType.TINYTEXT, 245 TokenType.TIME, 246 TokenType.TIMETZ, 247 TokenType.TIMESTAMP, 248 TokenType.TIMESTAMP_S, 249 TokenType.TIMESTAMP_MS, 250 TokenType.TIMESTAMP_NS, 251 TokenType.TIMESTAMPTZ, 252 TokenType.TIMESTAMPLTZ, 253 TokenType.TIMESTAMPNTZ, 254 TokenType.DATETIME, 255 TokenType.DATETIME64, 256 TokenType.DATE, 257 TokenType.DATE32, 258 TokenType.INT4RANGE, 259 TokenType.INT4MULTIRANGE, 260 TokenType.INT8RANGE, 261 TokenType.INT8MULTIRANGE, 262 TokenType.NUMRANGE, 263 TokenType.NUMMULTIRANGE, 264 TokenType.TSRANGE, 265 TokenType.TSMULTIRANGE, 266 TokenType.TSTZRANGE, 267 TokenType.TSTZMULTIRANGE, 268 TokenType.DATERANGE, 269 TokenType.DATEMULTIRANGE, 270 TokenType.DECIMAL, 271 TokenType.UDECIMAL, 272 TokenType.BIGDECIMAL, 273 TokenType.UUID, 274 TokenType.GEOGRAPHY, 275 TokenType.GEOMETRY, 276 TokenType.HLLSKETCH, 277 TokenType.HSTORE, 278 TokenType.PSEUDO_TYPE, 279 TokenType.SUPER, 280 TokenType.SERIAL, 281 TokenType.SMALLSERIAL, 282 TokenType.BIGSERIAL, 283 TokenType.XML, 284 TokenType.YEAR, 285 TokenType.UNIQUEIDENTIFIER, 286 TokenType.USERDEFINED, 287 TokenType.MONEY, 288 TokenType.SMALLMONEY, 289 TokenType.ROWVERSION, 290 TokenType.IMAGE, 291 TokenType.VARIANT, 292 TokenType.OBJECT, 293 TokenType.OBJECT_IDENTIFIER, 294 TokenType.INET, 295 TokenType.IPADDRESS, 296 TokenType.IPPREFIX, 297 TokenType.IPV4, 298 TokenType.IPV6, 299 TokenType.UNKNOWN, 300 TokenType.NULL, 301 TokenType.NAME, 302 TokenType.TDIGEST, 303 *ENUM_TYPE_TOKENS, 304 *NESTED_TYPE_TOKENS, 305 *AGGREGATE_TYPE_TOKENS, 306 } 307 308 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 309 TokenType.BIGINT: TokenType.UBIGINT, 310 TokenType.INT: TokenType.UINT, 311 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 312 TokenType.SMALLINT: TokenType.USMALLINT, 313 TokenType.TINYINT: TokenType.UTINYINT, 314 TokenType.DECIMAL: TokenType.UDECIMAL, 315 } 316 317 SUBQUERY_PREDICATES = { 318 TokenType.ANY: exp.Any, 319 TokenType.ALL: exp.All, 320 TokenType.EXISTS: exp.Exists, 321 TokenType.SOME: exp.Any, 322 } 323 324 RESERVED_TOKENS = { 325 *Tokenizer.SINGLE_TOKENS.values(), 326 TokenType.SELECT, 327 } - {TokenType.IDENTIFIER} 328 329 DB_CREATABLES = { 330 TokenType.DATABASE, 331 TokenType.DICTIONARY, 332 TokenType.MODEL, 333 TokenType.SCHEMA, 334 TokenType.SEQUENCE, 335 TokenType.STORAGE_INTEGRATION, 336 TokenType.TABLE, 337 TokenType.TAG, 338 TokenType.VIEW, 339 } 340 341 CREATABLES = { 342 TokenType.COLUMN, 343 TokenType.CONSTRAINT, 344 TokenType.FOREIGN_KEY, 345 TokenType.FUNCTION, 346 TokenType.INDEX, 347 TokenType.PROCEDURE, 348 *DB_CREATABLES, 349 } 350 351 # Tokens that can represent identifiers 352 ID_VAR_TOKENS = { 353 TokenType.VAR, 354 TokenType.ANTI, 355 TokenType.APPLY, 356 TokenType.ASC, 357 TokenType.ASOF, 358 TokenType.AUTO_INCREMENT, 359 TokenType.BEGIN, 360 TokenType.BPCHAR, 361 TokenType.CACHE, 362 TokenType.CASE, 363 TokenType.COLLATE, 364 TokenType.COMMAND, 365 TokenType.COMMENT, 366 TokenType.COMMIT, 367 TokenType.CONSTRAINT, 368 TokenType.COPY, 369 TokenType.DEFAULT, 370 TokenType.DELETE, 371 TokenType.DESC, 372 TokenType.DESCRIBE, 373 TokenType.DICTIONARY, 374 TokenType.DIV, 375 TokenType.END, 376 TokenType.EXECUTE, 377 TokenType.ESCAPE, 378 TokenType.FALSE, 379 TokenType.FIRST, 380 TokenType.FILTER, 381 TokenType.FINAL, 382 TokenType.FORMAT, 383 TokenType.FULL, 384 TokenType.IDENTIFIER, 385 TokenType.IS, 386 TokenType.ISNULL, 387 TokenType.INTERVAL, 388 TokenType.KEEP, 389 TokenType.KILL, 390 TokenType.LEFT, 391 TokenType.LOAD, 392 TokenType.MERGE, 393 TokenType.NATURAL, 394 TokenType.NEXT, 395 TokenType.OFFSET, 396 TokenType.OPERATOR, 397 TokenType.ORDINALITY, 398 TokenType.OVERLAPS, 399 TokenType.OVERWRITE, 400 TokenType.PARTITION, 401 TokenType.PERCENT, 402 TokenType.PIVOT, 403 TokenType.PRAGMA, 404 TokenType.RANGE, 405 TokenType.RECURSIVE, 406 TokenType.REFERENCES, 407 TokenType.REFRESH, 408 TokenType.REPLACE, 409 TokenType.RIGHT, 410 TokenType.ROLLUP, 411 TokenType.ROW, 412 TokenType.ROWS, 413 TokenType.SEMI, 414 TokenType.SET, 415 TokenType.SETTINGS, 416 TokenType.SHOW, 417 TokenType.TEMPORARY, 418 TokenType.TOP, 419 TokenType.TRUE, 420 TokenType.TRUNCATE, 421 TokenType.UNIQUE, 422 TokenType.UNPIVOT, 423 TokenType.UPDATE, 424 TokenType.USE, 425 TokenType.VOLATILE, 426 TokenType.WINDOW, 427 *CREATABLES, 428 *SUBQUERY_PREDICATES, 429 *TYPE_TOKENS, 430 *NO_PAREN_FUNCTIONS, 431 } 432 433 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 434 435 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 436 TokenType.ANTI, 437 TokenType.APPLY, 438 TokenType.ASOF, 439 TokenType.FULL, 440 TokenType.LEFT, 441 TokenType.LOCK, 442 TokenType.NATURAL, 443 TokenType.OFFSET, 444 TokenType.RIGHT, 445 TokenType.SEMI, 446 TokenType.WINDOW, 447 } 448 449 ALIAS_TOKENS = ID_VAR_TOKENS 450 451 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 452 453 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 454 455 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 456 457 FUNC_TOKENS = { 458 TokenType.COLLATE, 459 TokenType.COMMAND, 460 TokenType.CURRENT_DATE, 461 TokenType.CURRENT_DATETIME, 462 TokenType.CURRENT_TIMESTAMP, 463 TokenType.CURRENT_TIME, 464 TokenType.CURRENT_USER, 465 TokenType.FILTER, 466 TokenType.FIRST, 467 TokenType.FORMAT, 468 TokenType.GLOB, 469 TokenType.IDENTIFIER, 470 TokenType.INDEX, 471 TokenType.ISNULL, 472 TokenType.ILIKE, 473 TokenType.INSERT, 474 TokenType.LIKE, 475 TokenType.MERGE, 476 TokenType.OFFSET, 477 TokenType.PRIMARY_KEY, 478 TokenType.RANGE, 479 TokenType.REPLACE, 480 TokenType.RLIKE, 481 TokenType.ROW, 482 TokenType.UNNEST, 483 TokenType.VAR, 484 TokenType.LEFT, 485 TokenType.RIGHT, 486 TokenType.SEQUENCE, 487 TokenType.DATE, 488 TokenType.DATETIME, 489 TokenType.TABLE, 490 TokenType.TIMESTAMP, 491 TokenType.TIMESTAMPTZ, 492 TokenType.TRUNCATE, 493 TokenType.WINDOW, 494 TokenType.XOR, 495 *TYPE_TOKENS, 496 *SUBQUERY_PREDICATES, 497 } 498 499 CONJUNCTION = { 500 TokenType.AND: exp.And, 501 TokenType.OR: exp.Or, 502 } 503 504 EQUALITY = { 505 TokenType.COLON_EQ: exp.PropertyEQ, 506 TokenType.EQ: exp.EQ, 507 TokenType.NEQ: exp.NEQ, 508 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 509 } 510 511 COMPARISON = { 512 TokenType.GT: exp.GT, 513 TokenType.GTE: exp.GTE, 514 TokenType.LT: exp.LT, 515 TokenType.LTE: exp.LTE, 516 } 517 518 BITWISE = { 519 TokenType.AMP: exp.BitwiseAnd, 520 TokenType.CARET: exp.BitwiseXor, 521 TokenType.PIPE: exp.BitwiseOr, 522 } 523 524 TERM = { 525 TokenType.DASH: exp.Sub, 526 TokenType.PLUS: exp.Add, 527 TokenType.MOD: exp.Mod, 528 TokenType.COLLATE: exp.Collate, 529 } 530 531 FACTOR = { 532 TokenType.DIV: exp.IntDiv, 533 TokenType.LR_ARROW: exp.Distance, 534 TokenType.SLASH: exp.Div, 535 TokenType.STAR: exp.Mul, 536 } 537 538 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 539 540 TIMES = { 541 TokenType.TIME, 542 TokenType.TIMETZ, 543 } 544 545 TIMESTAMPS = { 546 TokenType.TIMESTAMP, 547 TokenType.TIMESTAMPTZ, 548 TokenType.TIMESTAMPLTZ, 549 *TIMES, 550 } 551 552 SET_OPERATIONS = { 553 TokenType.UNION, 554 TokenType.INTERSECT, 555 TokenType.EXCEPT, 556 } 557 558 JOIN_METHODS = { 559 TokenType.ASOF, 560 TokenType.NATURAL, 561 TokenType.POSITIONAL, 562 } 563 564 JOIN_SIDES = { 565 TokenType.LEFT, 566 TokenType.RIGHT, 567 TokenType.FULL, 568 } 569 570 JOIN_KINDS = { 571 TokenType.INNER, 572 TokenType.OUTER, 573 TokenType.CROSS, 574 TokenType.SEMI, 575 TokenType.ANTI, 576 } 577 578 JOIN_HINTS: t.Set[str] = set() 579 580 LAMBDAS = { 581 TokenType.ARROW: lambda self, expressions: self.expression( 582 exp.Lambda, 583 this=self._replace_lambda( 584 self._parse_conjunction(), 585 {node.name for node in expressions}, 586 ), 587 expressions=expressions, 588 ), 589 TokenType.FARROW: lambda self, expressions: self.expression( 590 exp.Kwarg, 591 this=exp.var(expressions[0].name), 592 expression=self._parse_conjunction(), 593 ), 594 } 595 596 COLUMN_OPERATORS = { 597 TokenType.DOT: None, 598 TokenType.DCOLON: lambda self, this, to: self.expression( 599 exp.Cast if self.STRICT_CAST else exp.TryCast, 600 this=this, 601 to=to, 602 ), 603 TokenType.ARROW: lambda self, this, path: self.expression( 604 exp.JSONExtract, 605 this=this, 606 expression=self.dialect.to_json_path(path), 607 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 608 ), 609 TokenType.DARROW: lambda self, this, path: self.expression( 610 exp.JSONExtractScalar, 611 this=this, 612 expression=self.dialect.to_json_path(path), 613 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 614 ), 615 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 616 exp.JSONBExtract, 617 this=this, 618 expression=path, 619 ), 620 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 621 exp.JSONBExtractScalar, 622 this=this, 623 expression=path, 624 ), 625 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 626 exp.JSONBContains, 627 this=this, 628 expression=key, 629 ), 630 } 631 632 EXPRESSION_PARSERS = { 633 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 634 exp.Column: lambda self: self._parse_column(), 635 exp.Condition: lambda self: self._parse_conjunction(), 636 exp.DataType: lambda self: self._parse_types(allow_identifiers=False), 637 exp.Expression: lambda self: self._parse_expression(), 638 exp.From: lambda self: self._parse_from(joins=True), 639 exp.Group: lambda self: self._parse_group(), 640 exp.Having: lambda self: self._parse_having(), 641 exp.Identifier: lambda self: self._parse_id_var(), 642 exp.Join: lambda self: self._parse_join(), 643 exp.Lambda: lambda self: self._parse_lambda(), 644 exp.Lateral: lambda self: self._parse_lateral(), 645 exp.Limit: lambda self: self._parse_limit(), 646 exp.Offset: lambda self: self._parse_offset(), 647 exp.Order: lambda self: self._parse_order(), 648 exp.Ordered: lambda self: self._parse_ordered(), 649 exp.Properties: lambda self: self._parse_properties(), 650 exp.Qualify: lambda self: self._parse_qualify(), 651 exp.Returning: lambda self: self._parse_returning(), 652 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 653 exp.Table: lambda self: self._parse_table_parts(), 654 exp.TableAlias: lambda self: self._parse_table_alias(), 655 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 656 exp.Where: lambda self: self._parse_where(), 657 exp.Window: lambda self: self._parse_named_window(), 658 exp.With: lambda self: self._parse_with(), 659 "JOIN_TYPE": lambda self: self._parse_join_parts(), 660 } 661 662 STATEMENT_PARSERS = { 663 TokenType.ALTER: lambda self: self._parse_alter(), 664 TokenType.BEGIN: lambda self: self._parse_transaction(), 665 TokenType.CACHE: lambda self: self._parse_cache(), 666 TokenType.COMMENT: lambda self: self._parse_comment(), 667 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 668 TokenType.COPY: lambda self: self._parse_copy(), 669 TokenType.CREATE: lambda self: self._parse_create(), 670 TokenType.DELETE: lambda self: self._parse_delete(), 671 TokenType.DESC: lambda self: self._parse_describe(), 672 TokenType.DESCRIBE: lambda self: self._parse_describe(), 673 TokenType.DROP: lambda self: self._parse_drop(), 674 TokenType.INSERT: lambda self: self._parse_insert(), 675 TokenType.KILL: lambda self: self._parse_kill(), 676 TokenType.LOAD: lambda self: self._parse_load(), 677 TokenType.MERGE: lambda self: self._parse_merge(), 678 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 679 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 680 TokenType.REFRESH: lambda self: self._parse_refresh(), 681 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 682 TokenType.SET: lambda self: self._parse_set(), 683 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 684 TokenType.UNCACHE: lambda self: self._parse_uncache(), 685 TokenType.UPDATE: lambda self: self._parse_update(), 686 TokenType.USE: lambda self: self.expression( 687 exp.Use, 688 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 689 this=self._parse_table(schema=False), 690 ), 691 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 692 } 693 694 UNARY_PARSERS = { 695 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 696 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 697 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 698 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 699 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 700 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 701 } 702 703 STRING_PARSERS = { 704 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 705 exp.RawString, this=token.text 706 ), 707 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 708 exp.National, this=token.text 709 ), 710 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 711 TokenType.STRING: lambda self, token: self.expression( 712 exp.Literal, this=token.text, is_string=True 713 ), 714 TokenType.UNICODE_STRING: lambda self, token: self.expression( 715 exp.UnicodeString, 716 this=token.text, 717 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 718 ), 719 } 720 721 NUMERIC_PARSERS = { 722 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 723 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 724 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 725 TokenType.NUMBER: lambda self, token: self.expression( 726 exp.Literal, this=token.text, is_string=False 727 ), 728 } 729 730 PRIMARY_PARSERS = { 731 **STRING_PARSERS, 732 **NUMERIC_PARSERS, 733 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 734 TokenType.NULL: lambda self, _: self.expression(exp.Null), 735 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 736 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 737 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 738 TokenType.STAR: lambda self, _: self.expression( 739 exp.Star, 740 **{ 741 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 742 "replace": self._parse_star_op("REPLACE"), 743 "rename": self._parse_star_op("RENAME"), 744 }, 745 ), 746 } 747 748 PLACEHOLDER_PARSERS = { 749 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 750 TokenType.PARAMETER: lambda self: self._parse_parameter(), 751 TokenType.COLON: lambda self: ( 752 self.expression(exp.Placeholder, this=self._prev.text) 753 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 754 else None 755 ), 756 } 757 758 RANGE_PARSERS = { 759 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 760 TokenType.GLOB: binary_range_parser(exp.Glob), 761 TokenType.ILIKE: binary_range_parser(exp.ILike), 762 TokenType.IN: lambda self, this: self._parse_in(this), 763 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 764 TokenType.IS: lambda self, this: self._parse_is(this), 765 TokenType.LIKE: binary_range_parser(exp.Like), 766 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 767 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 768 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 769 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 770 } 771 772 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 773 "ALLOWED_VALUES": lambda self: self.expression( 774 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 775 ), 776 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 777 "AUTO": lambda self: self._parse_auto_property(), 778 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 779 "BACKUP": lambda self: self.expression( 780 exp.BackupProperty, this=self._parse_var(any_token=True) 781 ), 782 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 783 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 784 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 785 "CHECKSUM": lambda self: self._parse_checksum(), 786 "CLUSTER BY": lambda self: self._parse_cluster(), 787 "CLUSTERED": lambda self: self._parse_clustered_by(), 788 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 789 exp.CollateProperty, **kwargs 790 ), 791 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 792 "CONTAINS": lambda self: self._parse_contains_property(), 793 "COPY": lambda self: self._parse_copy_property(), 794 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 795 "DEFINER": lambda self: self._parse_definer(), 796 "DETERMINISTIC": lambda self: self.expression( 797 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 798 ), 799 "DISTKEY": lambda self: self._parse_distkey(), 800 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 801 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 802 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 803 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 804 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 805 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 806 "FREESPACE": lambda self: self._parse_freespace(), 807 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 808 "HEAP": lambda self: self.expression(exp.HeapProperty), 809 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 810 "IMMUTABLE": lambda self: self.expression( 811 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 812 ), 813 "INHERITS": lambda self: self.expression( 814 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 815 ), 816 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 817 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 818 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 819 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 820 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 821 "LIKE": lambda self: self._parse_create_like(), 822 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 823 "LOCK": lambda self: self._parse_locking(), 824 "LOCKING": lambda self: self._parse_locking(), 825 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 826 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 827 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 828 "MODIFIES": lambda self: self._parse_modifies_property(), 829 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 830 "NO": lambda self: self._parse_no_property(), 831 "ON": lambda self: self._parse_on_property(), 832 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 833 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 834 "PARTITION": lambda self: self._parse_partitioned_of(), 835 "PARTITION BY": lambda self: self._parse_partitioned_by(), 836 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 837 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 838 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 839 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 840 "READS": lambda self: self._parse_reads_property(), 841 "REMOTE": lambda self: self._parse_remote_with_connection(), 842 "RETURNS": lambda self: self._parse_returns(), 843 "ROW": lambda self: self._parse_row(), 844 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 845 "SAMPLE": lambda self: self.expression( 846 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 847 ), 848 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 849 "SETTINGS": lambda self: self.expression( 850 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 851 ), 852 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 853 "SORTKEY": lambda self: self._parse_sortkey(), 854 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 855 "STABLE": lambda self: self.expression( 856 exp.StabilityProperty, this=exp.Literal.string("STABLE") 857 ), 858 "STORED": lambda self: self._parse_stored(), 859 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 860 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 861 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 862 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 863 "TO": lambda self: self._parse_to_table(), 864 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 865 "TRANSFORM": lambda self: self.expression( 866 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 867 ), 868 "TTL": lambda self: self._parse_ttl(), 869 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 870 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 871 "VOLATILE": lambda self: self._parse_volatile_property(), 872 "WITH": lambda self: self._parse_with_property(), 873 } 874 875 CONSTRAINT_PARSERS = { 876 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 877 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 878 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 879 "CHARACTER SET": lambda self: self.expression( 880 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 881 ), 882 "CHECK": lambda self: self.expression( 883 exp.CheckColumnConstraint, 884 this=self._parse_wrapped(self._parse_conjunction), 885 enforced=self._match_text_seq("ENFORCED"), 886 ), 887 "COLLATE": lambda self: self.expression( 888 exp.CollateColumnConstraint, this=self._parse_var() 889 ), 890 "COMMENT": lambda self: self.expression( 891 exp.CommentColumnConstraint, this=self._parse_string() 892 ), 893 "COMPRESS": lambda self: self._parse_compress(), 894 "CLUSTERED": lambda self: self.expression( 895 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 896 ), 897 "NONCLUSTERED": lambda self: self.expression( 898 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 899 ), 900 "DEFAULT": lambda self: self.expression( 901 exp.DefaultColumnConstraint, this=self._parse_bitwise() 902 ), 903 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 904 "EPHEMERAL": lambda self: self.expression( 905 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 906 ), 907 "EXCLUDE": lambda self: self.expression( 908 exp.ExcludeColumnConstraint, this=self._parse_index_params() 909 ), 910 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 911 "FORMAT": lambda self: self.expression( 912 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 913 ), 914 "GENERATED": lambda self: self._parse_generated_as_identity(), 915 "IDENTITY": lambda self: self._parse_auto_increment(), 916 "INLINE": lambda self: self._parse_inline(), 917 "LIKE": lambda self: self._parse_create_like(), 918 "NOT": lambda self: self._parse_not_constraint(), 919 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 920 "ON": lambda self: ( 921 self._match(TokenType.UPDATE) 922 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 923 ) 924 or self.expression(exp.OnProperty, this=self._parse_id_var()), 925 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 926 "PERIOD": lambda self: self._parse_period_for_system_time(), 927 "PRIMARY KEY": lambda self: self._parse_primary_key(), 928 "REFERENCES": lambda self: self._parse_references(match=False), 929 "TITLE": lambda self: self.expression( 930 exp.TitleColumnConstraint, this=self._parse_var_or_string() 931 ), 932 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 933 "UNIQUE": lambda self: self._parse_unique(), 934 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 935 "WITH": lambda self: self.expression( 936 exp.Properties, expressions=self._parse_wrapped_properties() 937 ), 938 } 939 940 ALTER_PARSERS = { 941 "ADD": lambda self: self._parse_alter_table_add(), 942 "ALTER": lambda self: self._parse_alter_table_alter(), 943 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 944 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 945 "DROP": lambda self: self._parse_alter_table_drop(), 946 "RENAME": lambda self: self._parse_alter_table_rename(), 947 } 948 949 ALTER_ALTER_PARSERS = { 950 "DISTKEY": lambda self: self._parse_alter_diststyle(), 951 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 952 "SORTKEY": lambda self: self._parse_alter_sortkey(), 953 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 954 } 955 956 SCHEMA_UNNAMED_CONSTRAINTS = { 957 "CHECK", 958 "EXCLUDE", 959 "FOREIGN KEY", 960 "LIKE", 961 "PERIOD", 962 "PRIMARY KEY", 963 "UNIQUE", 964 } 965 966 NO_PAREN_FUNCTION_PARSERS = { 967 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 968 "CASE": lambda self: self._parse_case(), 969 "IF": lambda self: self._parse_if(), 970 "NEXT": lambda self: self._parse_next_value_for(), 971 } 972 973 INVALID_FUNC_NAME_TOKENS = { 974 TokenType.IDENTIFIER, 975 TokenType.STRING, 976 } 977 978 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 979 980 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 981 982 FUNCTION_PARSERS = { 983 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 984 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 985 "DECODE": lambda self: self._parse_decode(), 986 "EXTRACT": lambda self: self._parse_extract(), 987 "JSON_OBJECT": lambda self: self._parse_json_object(), 988 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 989 "JSON_TABLE": lambda self: self._parse_json_table(), 990 "MATCH": lambda self: self._parse_match_against(), 991 "OPENJSON": lambda self: self._parse_open_json(), 992 "POSITION": lambda self: self._parse_position(), 993 "PREDICT": lambda self: self._parse_predict(), 994 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 995 "STRING_AGG": lambda self: self._parse_string_agg(), 996 "SUBSTRING": lambda self: self._parse_substring(), 997 "TRIM": lambda self: self._parse_trim(), 998 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 999 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1000 } 1001 1002 QUERY_MODIFIER_PARSERS = { 1003 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1004 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1005 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1006 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1007 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1008 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1009 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1010 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1011 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1012 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1013 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1014 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1015 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1016 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1017 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1018 TokenType.CLUSTER_BY: lambda self: ( 1019 "cluster", 1020 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1021 ), 1022 TokenType.DISTRIBUTE_BY: lambda self: ( 1023 "distribute", 1024 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1025 ), 1026 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1027 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1028 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1029 } 1030 1031 SET_PARSERS = { 1032 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1033 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1034 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1035 "TRANSACTION": lambda self: self._parse_set_transaction(), 1036 } 1037 1038 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1039 1040 TYPE_LITERAL_PARSERS = { 1041 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1042 } 1043 1044 TYPE_CONVERTER: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1045 1046 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1047 1048 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1049 1050 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1051 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1052 "ISOLATION": ( 1053 ("LEVEL", "REPEATABLE", "READ"), 1054 ("LEVEL", "READ", "COMMITTED"), 1055 ("LEVEL", "READ", "UNCOMITTED"), 1056 ("LEVEL", "SERIALIZABLE"), 1057 ), 1058 "READ": ("WRITE", "ONLY"), 1059 } 1060 1061 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1062 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1063 ) 1064 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1065 1066 CREATE_SEQUENCE: OPTIONS_TYPE = { 1067 "SCALE": ("EXTEND", "NOEXTEND"), 1068 "SHARD": ("EXTEND", "NOEXTEND"), 1069 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1070 **dict.fromkeys( 1071 ( 1072 "SESSION", 1073 "GLOBAL", 1074 "KEEP", 1075 "NOKEEP", 1076 "ORDER", 1077 "NOORDER", 1078 "NOCACHE", 1079 "CYCLE", 1080 "NOCYCLE", 1081 "NOMINVALUE", 1082 "NOMAXVALUE", 1083 "NOSCALE", 1084 "NOSHARD", 1085 ), 1086 tuple(), 1087 ), 1088 } 1089 1090 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1091 1092 USABLES: OPTIONS_TYPE = dict.fromkeys(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"), tuple()) 1093 1094 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1095 1096 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1097 1098 CLONE_KEYWORDS = {"CLONE", "COPY"} 1099 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1100 1101 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1102 1103 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1104 1105 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1106 1107 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1108 1109 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1110 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1111 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1112 1113 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1114 1115 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1116 1117 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 1118 1119 DISTINCT_TOKENS = {TokenType.DISTINCT} 1120 1121 NULL_TOKENS = {TokenType.NULL} 1122 1123 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1124 1125 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1126 1127 STRICT_CAST = True 1128 1129 PREFIXED_PIVOT_COLUMNS = False 1130 IDENTIFY_PIVOT_STRINGS = False 1131 1132 LOG_DEFAULTS_TO_LN = False 1133 1134 # Whether ADD is present for each column added by ALTER TABLE 1135 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1136 1137 # Whether the table sample clause expects CSV syntax 1138 TABLESAMPLE_CSV = False 1139 1140 # The default method used for table sampling 1141 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1142 1143 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1144 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1145 1146 # Whether the TRIM function expects the characters to trim as its first argument 1147 TRIM_PATTERN_FIRST = False 1148 1149 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1150 STRING_ALIASES = False 1151 1152 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1153 MODIFIERS_ATTACHED_TO_UNION = True 1154 UNION_MODIFIERS = {"order", "limit", "offset"} 1155 1156 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1157 NO_PAREN_IF_COMMANDS = True 1158 1159 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1160 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1161 1162 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1163 # If this is True and '(' is not found, the keyword will be treated as an identifier 1164 VALUES_FOLLOWED_BY_PAREN = True 1165 1166 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1167 SUPPORTS_IMPLICIT_UNNEST = False 1168 1169 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1170 INTERVAL_SPANS = True 1171 1172 # Whether a PARTITION clause can follow a table reference 1173 SUPPORTS_PARTITION_SELECTION = False 1174 1175 __slots__ = ( 1176 "error_level", 1177 "error_message_context", 1178 "max_errors", 1179 "dialect", 1180 "sql", 1181 "errors", 1182 "_tokens", 1183 "_index", 1184 "_curr", 1185 "_next", 1186 "_prev", 1187 "_prev_comments", 1188 ) 1189 1190 # Autofilled 1191 SHOW_TRIE: t.Dict = {} 1192 SET_TRIE: t.Dict = {} 1193 1194 def __init__( 1195 self, 1196 error_level: t.Optional[ErrorLevel] = None, 1197 error_message_context: int = 100, 1198 max_errors: int = 3, 1199 dialect: DialectType = None, 1200 ): 1201 from sqlglot.dialects import Dialect 1202 1203 self.error_level = error_level or ErrorLevel.IMMEDIATE 1204 self.error_message_context = error_message_context 1205 self.max_errors = max_errors 1206 self.dialect = Dialect.get_or_raise(dialect) 1207 self.reset() 1208 1209 def reset(self): 1210 self.sql = "" 1211 self.errors = [] 1212 self._tokens = [] 1213 self._index = 0 1214 self._curr = None 1215 self._next = None 1216 self._prev = None 1217 self._prev_comments = None 1218 1219 def parse( 1220 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1221 ) -> t.List[t.Optional[exp.Expression]]: 1222 """ 1223 Parses a list of tokens and returns a list of syntax trees, one tree 1224 per parsed SQL statement. 1225 1226 Args: 1227 raw_tokens: The list of tokens. 1228 sql: The original SQL string, used to produce helpful debug messages. 1229 1230 Returns: 1231 The list of the produced syntax trees. 1232 """ 1233 return self._parse( 1234 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1235 ) 1236 1237 def parse_into( 1238 self, 1239 expression_types: exp.IntoType, 1240 raw_tokens: t.List[Token], 1241 sql: t.Optional[str] = None, 1242 ) -> t.List[t.Optional[exp.Expression]]: 1243 """ 1244 Parses a list of tokens into a given Expression type. If a collection of Expression 1245 types is given instead, this method will try to parse the token list into each one 1246 of them, stopping at the first for which the parsing succeeds. 1247 1248 Args: 1249 expression_types: The expression type(s) to try and parse the token list into. 1250 raw_tokens: The list of tokens. 1251 sql: The original SQL string, used to produce helpful debug messages. 1252 1253 Returns: 1254 The target Expression. 1255 """ 1256 errors = [] 1257 for expression_type in ensure_list(expression_types): 1258 parser = self.EXPRESSION_PARSERS.get(expression_type) 1259 if not parser: 1260 raise TypeError(f"No parser registered for {expression_type}") 1261 1262 try: 1263 return self._parse(parser, raw_tokens, sql) 1264 except ParseError as e: 1265 e.errors[0]["into_expression"] = expression_type 1266 errors.append(e) 1267 1268 raise ParseError( 1269 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1270 errors=merge_errors(errors), 1271 ) from errors[-1] 1272 1273 def _parse( 1274 self, 1275 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1276 raw_tokens: t.List[Token], 1277 sql: t.Optional[str] = None, 1278 ) -> t.List[t.Optional[exp.Expression]]: 1279 self.reset() 1280 self.sql = sql or "" 1281 1282 total = len(raw_tokens) 1283 chunks: t.List[t.List[Token]] = [[]] 1284 1285 for i, token in enumerate(raw_tokens): 1286 if token.token_type == TokenType.SEMICOLON: 1287 if token.comments: 1288 chunks.append([token]) 1289 1290 if i < total - 1: 1291 chunks.append([]) 1292 else: 1293 chunks[-1].append(token) 1294 1295 expressions = [] 1296 1297 for tokens in chunks: 1298 self._index = -1 1299 self._tokens = tokens 1300 self._advance() 1301 1302 expressions.append(parse_method(self)) 1303 1304 if self._index < len(self._tokens): 1305 self.raise_error("Invalid expression / Unexpected token") 1306 1307 self.check_errors() 1308 1309 return expressions 1310 1311 def check_errors(self) -> None: 1312 """Logs or raises any found errors, depending on the chosen error level setting.""" 1313 if self.error_level == ErrorLevel.WARN: 1314 for error in self.errors: 1315 logger.error(str(error)) 1316 elif self.error_level == ErrorLevel.RAISE and self.errors: 1317 raise ParseError( 1318 concat_messages(self.errors, self.max_errors), 1319 errors=merge_errors(self.errors), 1320 ) 1321 1322 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1323 """ 1324 Appends an error in the list of recorded errors or raises it, depending on the chosen 1325 error level setting. 1326 """ 1327 token = token or self._curr or self._prev or Token.string("") 1328 start = token.start 1329 end = token.end + 1 1330 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1331 highlight = self.sql[start:end] 1332 end_context = self.sql[end : end + self.error_message_context] 1333 1334 error = ParseError.new( 1335 f"{message}. Line {token.line}, Col: {token.col}.\n" 1336 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1337 description=message, 1338 line=token.line, 1339 col=token.col, 1340 start_context=start_context, 1341 highlight=highlight, 1342 end_context=end_context, 1343 ) 1344 1345 if self.error_level == ErrorLevel.IMMEDIATE: 1346 raise error 1347 1348 self.errors.append(error) 1349 1350 def expression( 1351 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1352 ) -> E: 1353 """ 1354 Creates a new, validated Expression. 1355 1356 Args: 1357 exp_class: The expression class to instantiate. 1358 comments: An optional list of comments to attach to the expression. 1359 kwargs: The arguments to set for the expression along with their respective values. 1360 1361 Returns: 1362 The target expression. 1363 """ 1364 instance = exp_class(**kwargs) 1365 instance.add_comments(comments) if comments else self._add_comments(instance) 1366 return self.validate_expression(instance) 1367 1368 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1369 if expression and self._prev_comments: 1370 expression.add_comments(self._prev_comments) 1371 self._prev_comments = None 1372 1373 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1374 """ 1375 Validates an Expression, making sure that all its mandatory arguments are set. 1376 1377 Args: 1378 expression: The expression to validate. 1379 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1380 1381 Returns: 1382 The validated expression. 1383 """ 1384 if self.error_level != ErrorLevel.IGNORE: 1385 for error_message in expression.error_messages(args): 1386 self.raise_error(error_message) 1387 1388 return expression 1389 1390 def _find_sql(self, start: Token, end: Token) -> str: 1391 return self.sql[start.start : end.end + 1] 1392 1393 def _is_connected(self) -> bool: 1394 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1395 1396 def _advance(self, times: int = 1) -> None: 1397 self._index += times 1398 self._curr = seq_get(self._tokens, self._index) 1399 self._next = seq_get(self._tokens, self._index + 1) 1400 1401 if self._index > 0: 1402 self._prev = self._tokens[self._index - 1] 1403 self._prev_comments = self._prev.comments 1404 else: 1405 self._prev = None 1406 self._prev_comments = None 1407 1408 def _retreat(self, index: int) -> None: 1409 if index != self._index: 1410 self._advance(index - self._index) 1411 1412 def _warn_unsupported(self) -> None: 1413 if len(self._tokens) <= 1: 1414 return 1415 1416 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1417 # interested in emitting a warning for the one being currently processed. 1418 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1419 1420 logger.warning( 1421 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1422 ) 1423 1424 def _parse_command(self) -> exp.Command: 1425 self._warn_unsupported() 1426 return self.expression( 1427 exp.Command, this=self._prev.text.upper(), expression=self._parse_string() 1428 ) 1429 1430 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1431 """ 1432 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. This behavior can 1433 be different depending on the uset-set ErrorLevel, so _try_parse aims to solve this by setting & resetting 1434 the parser state accordingly 1435 """ 1436 index = self._index 1437 error_level = self.error_level 1438 1439 self.error_level = ErrorLevel.IMMEDIATE 1440 try: 1441 this = parse_method() 1442 except ParseError: 1443 this = None 1444 finally: 1445 if not this or retreat: 1446 self._retreat(index) 1447 self.error_level = error_level 1448 1449 return this 1450 1451 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1452 start = self._prev 1453 exists = self._parse_exists() if allow_exists else None 1454 1455 self._match(TokenType.ON) 1456 1457 materialized = self._match_text_seq("MATERIALIZED") 1458 kind = self._match_set(self.CREATABLES) and self._prev 1459 if not kind: 1460 return self._parse_as_command(start) 1461 1462 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1463 this = self._parse_user_defined_function(kind=kind.token_type) 1464 elif kind.token_type == TokenType.TABLE: 1465 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1466 elif kind.token_type == TokenType.COLUMN: 1467 this = self._parse_column() 1468 else: 1469 this = self._parse_id_var() 1470 1471 self._match(TokenType.IS) 1472 1473 return self.expression( 1474 exp.Comment, 1475 this=this, 1476 kind=kind.text, 1477 expression=self._parse_string(), 1478 exists=exists, 1479 materialized=materialized, 1480 ) 1481 1482 def _parse_to_table( 1483 self, 1484 ) -> exp.ToTableProperty: 1485 table = self._parse_table_parts(schema=True) 1486 return self.expression(exp.ToTableProperty, this=table) 1487 1488 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1489 def _parse_ttl(self) -> exp.Expression: 1490 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1491 this = self._parse_bitwise() 1492 1493 if self._match_text_seq("DELETE"): 1494 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1495 if self._match_text_seq("RECOMPRESS"): 1496 return self.expression( 1497 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1498 ) 1499 if self._match_text_seq("TO", "DISK"): 1500 return self.expression( 1501 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1502 ) 1503 if self._match_text_seq("TO", "VOLUME"): 1504 return self.expression( 1505 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1506 ) 1507 1508 return this 1509 1510 expressions = self._parse_csv(_parse_ttl_action) 1511 where = self._parse_where() 1512 group = self._parse_group() 1513 1514 aggregates = None 1515 if group and self._match(TokenType.SET): 1516 aggregates = self._parse_csv(self._parse_set_item) 1517 1518 return self.expression( 1519 exp.MergeTreeTTL, 1520 expressions=expressions, 1521 where=where, 1522 group=group, 1523 aggregates=aggregates, 1524 ) 1525 1526 def _parse_statement(self) -> t.Optional[exp.Expression]: 1527 if self._curr is None: 1528 return None 1529 1530 if self._match_set(self.STATEMENT_PARSERS): 1531 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1532 1533 if self._match_set(self.dialect.tokenizer.COMMANDS): 1534 return self._parse_command() 1535 1536 expression = self._parse_expression() 1537 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1538 return self._parse_query_modifiers(expression) 1539 1540 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1541 start = self._prev 1542 temporary = self._match(TokenType.TEMPORARY) 1543 materialized = self._match_text_seq("MATERIALIZED") 1544 1545 kind = self._match_set(self.CREATABLES) and self._prev.text 1546 if not kind: 1547 return self._parse_as_command(start) 1548 1549 if_exists = exists or self._parse_exists() 1550 table = self._parse_table_parts( 1551 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1552 ) 1553 1554 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1555 1556 if self._match(TokenType.L_PAREN, advance=False): 1557 expressions = self._parse_wrapped_csv(self._parse_types) 1558 else: 1559 expressions = None 1560 1561 return self.expression( 1562 exp.Drop, 1563 comments=start.comments, 1564 exists=if_exists, 1565 this=table, 1566 expressions=expressions, 1567 kind=kind.upper(), 1568 temporary=temporary, 1569 materialized=materialized, 1570 cascade=self._match_text_seq("CASCADE"), 1571 constraints=self._match_text_seq("CONSTRAINTS"), 1572 purge=self._match_text_seq("PURGE"), 1573 cluster=cluster, 1574 ) 1575 1576 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1577 return ( 1578 self._match_text_seq("IF") 1579 and (not not_ or self._match(TokenType.NOT)) 1580 and self._match(TokenType.EXISTS) 1581 ) 1582 1583 def _parse_create(self) -> exp.Create | exp.Command: 1584 # Note: this can't be None because we've matched a statement parser 1585 start = self._prev 1586 comments = self._prev_comments 1587 1588 replace = ( 1589 start.token_type == TokenType.REPLACE 1590 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1591 or self._match_pair(TokenType.OR, TokenType.ALTER) 1592 ) 1593 1594 unique = self._match(TokenType.UNIQUE) 1595 1596 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1597 self._advance() 1598 1599 properties = None 1600 create_token = self._match_set(self.CREATABLES) and self._prev 1601 1602 if not create_token: 1603 # exp.Properties.Location.POST_CREATE 1604 properties = self._parse_properties() 1605 create_token = self._match_set(self.CREATABLES) and self._prev 1606 1607 if not properties or not create_token: 1608 return self._parse_as_command(start) 1609 1610 exists = self._parse_exists(not_=True) 1611 this = None 1612 expression: t.Optional[exp.Expression] = None 1613 indexes = None 1614 no_schema_binding = None 1615 begin = None 1616 end = None 1617 clone = None 1618 1619 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1620 nonlocal properties 1621 if properties and temp_props: 1622 properties.expressions.extend(temp_props.expressions) 1623 elif temp_props: 1624 properties = temp_props 1625 1626 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1627 this = self._parse_user_defined_function(kind=create_token.token_type) 1628 1629 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1630 extend_props(self._parse_properties()) 1631 1632 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1633 1634 if not expression: 1635 if self._match(TokenType.COMMAND): 1636 expression = self._parse_as_command(self._prev) 1637 else: 1638 begin = self._match(TokenType.BEGIN) 1639 return_ = self._match_text_seq("RETURN") 1640 1641 if self._match(TokenType.STRING, advance=False): 1642 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1643 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1644 expression = self._parse_string() 1645 extend_props(self._parse_properties()) 1646 else: 1647 expression = self._parse_statement() 1648 1649 end = self._match_text_seq("END") 1650 1651 if return_: 1652 expression = self.expression(exp.Return, this=expression) 1653 elif create_token.token_type == TokenType.INDEX: 1654 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1655 if not self._match(TokenType.ON): 1656 index = self._parse_id_var() 1657 anonymous = False 1658 else: 1659 index = None 1660 anonymous = True 1661 1662 this = self._parse_index(index=index, anonymous=anonymous) 1663 elif create_token.token_type in self.DB_CREATABLES: 1664 table_parts = self._parse_table_parts( 1665 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1666 ) 1667 1668 # exp.Properties.Location.POST_NAME 1669 self._match(TokenType.COMMA) 1670 extend_props(self._parse_properties(before=True)) 1671 1672 this = self._parse_schema(this=table_parts) 1673 1674 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1675 extend_props(self._parse_properties()) 1676 1677 self._match(TokenType.ALIAS) 1678 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1679 # exp.Properties.Location.POST_ALIAS 1680 extend_props(self._parse_properties()) 1681 1682 if create_token.token_type == TokenType.SEQUENCE: 1683 expression = self._parse_types() 1684 extend_props(self._parse_properties()) 1685 else: 1686 expression = self._parse_ddl_select() 1687 1688 if create_token.token_type == TokenType.TABLE: 1689 # exp.Properties.Location.POST_EXPRESSION 1690 extend_props(self._parse_properties()) 1691 1692 indexes = [] 1693 while True: 1694 index = self._parse_index() 1695 1696 # exp.Properties.Location.POST_INDEX 1697 extend_props(self._parse_properties()) 1698 1699 if not index: 1700 break 1701 else: 1702 self._match(TokenType.COMMA) 1703 indexes.append(index) 1704 elif create_token.token_type == TokenType.VIEW: 1705 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1706 no_schema_binding = True 1707 1708 shallow = self._match_text_seq("SHALLOW") 1709 1710 if self._match_texts(self.CLONE_KEYWORDS): 1711 copy = self._prev.text.lower() == "copy" 1712 clone = self.expression( 1713 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1714 ) 1715 1716 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1717 return self._parse_as_command(start) 1718 1719 return self.expression( 1720 exp.Create, 1721 comments=comments, 1722 this=this, 1723 kind=create_token.text.upper(), 1724 replace=replace, 1725 unique=unique, 1726 expression=expression, 1727 exists=exists, 1728 properties=properties, 1729 indexes=indexes, 1730 no_schema_binding=no_schema_binding, 1731 begin=begin, 1732 end=end, 1733 clone=clone, 1734 ) 1735 1736 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1737 seq = exp.SequenceProperties() 1738 1739 options = [] 1740 index = self._index 1741 1742 while self._curr: 1743 self._match(TokenType.COMMA) 1744 if self._match_text_seq("INCREMENT"): 1745 self._match_text_seq("BY") 1746 self._match_text_seq("=") 1747 seq.set("increment", self._parse_term()) 1748 elif self._match_text_seq("MINVALUE"): 1749 seq.set("minvalue", self._parse_term()) 1750 elif self._match_text_seq("MAXVALUE"): 1751 seq.set("maxvalue", self._parse_term()) 1752 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1753 self._match_text_seq("=") 1754 seq.set("start", self._parse_term()) 1755 elif self._match_text_seq("CACHE"): 1756 # T-SQL allows empty CACHE which is initialized dynamically 1757 seq.set("cache", self._parse_number() or True) 1758 elif self._match_text_seq("OWNED", "BY"): 1759 # "OWNED BY NONE" is the default 1760 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1761 else: 1762 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1763 if opt: 1764 options.append(opt) 1765 else: 1766 break 1767 1768 seq.set("options", options if options else None) 1769 return None if self._index == index else seq 1770 1771 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1772 # only used for teradata currently 1773 self._match(TokenType.COMMA) 1774 1775 kwargs = { 1776 "no": self._match_text_seq("NO"), 1777 "dual": self._match_text_seq("DUAL"), 1778 "before": self._match_text_seq("BEFORE"), 1779 "default": self._match_text_seq("DEFAULT"), 1780 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1781 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1782 "after": self._match_text_seq("AFTER"), 1783 "minimum": self._match_texts(("MIN", "MINIMUM")), 1784 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1785 } 1786 1787 if self._match_texts(self.PROPERTY_PARSERS): 1788 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1789 try: 1790 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1791 except TypeError: 1792 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1793 1794 return None 1795 1796 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1797 return self._parse_wrapped_csv(self._parse_property) 1798 1799 def _parse_property(self) -> t.Optional[exp.Expression]: 1800 if self._match_texts(self.PROPERTY_PARSERS): 1801 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1802 1803 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1804 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1805 1806 if self._match_text_seq("COMPOUND", "SORTKEY"): 1807 return self._parse_sortkey(compound=True) 1808 1809 if self._match_text_seq("SQL", "SECURITY"): 1810 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1811 1812 index = self._index 1813 key = self._parse_column() 1814 1815 if not self._match(TokenType.EQ): 1816 self._retreat(index) 1817 return self._parse_sequence_properties() 1818 1819 return self.expression( 1820 exp.Property, 1821 this=key.to_dot() if isinstance(key, exp.Column) else key, 1822 value=self._parse_bitwise() or self._parse_var(any_token=True), 1823 ) 1824 1825 def _parse_stored(self) -> exp.FileFormatProperty: 1826 self._match(TokenType.ALIAS) 1827 1828 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1829 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1830 1831 return self.expression( 1832 exp.FileFormatProperty, 1833 this=( 1834 self.expression( 1835 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1836 ) 1837 if input_format or output_format 1838 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 1839 ), 1840 ) 1841 1842 def _parse_unquoted_field(self): 1843 field = self._parse_field() 1844 if isinstance(field, exp.Identifier) and not field.quoted: 1845 field = exp.var(field) 1846 1847 return field 1848 1849 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 1850 self._match(TokenType.EQ) 1851 self._match(TokenType.ALIAS) 1852 1853 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 1854 1855 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1856 properties = [] 1857 while True: 1858 if before: 1859 prop = self._parse_property_before() 1860 else: 1861 prop = self._parse_property() 1862 if not prop: 1863 break 1864 for p in ensure_list(prop): 1865 properties.append(p) 1866 1867 if properties: 1868 return self.expression(exp.Properties, expressions=properties) 1869 1870 return None 1871 1872 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1873 return self.expression( 1874 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1875 ) 1876 1877 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1878 if self._index >= 2: 1879 pre_volatile_token = self._tokens[self._index - 2] 1880 else: 1881 pre_volatile_token = None 1882 1883 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1884 return exp.VolatileProperty() 1885 1886 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1887 1888 def _parse_system_versioning_property(self) -> exp.WithSystemVersioningProperty: 1889 self._match_pair(TokenType.EQ, TokenType.ON) 1890 1891 prop = self.expression(exp.WithSystemVersioningProperty) 1892 if self._match(TokenType.L_PAREN): 1893 self._match_text_seq("HISTORY_TABLE", "=") 1894 prop.set("this", self._parse_table_parts()) 1895 1896 if self._match(TokenType.COMMA): 1897 self._match_text_seq("DATA_CONSISTENCY_CHECK", "=") 1898 prop.set("expression", self._advance_any() and self._prev.text.upper()) 1899 1900 self._match_r_paren() 1901 1902 return prop 1903 1904 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 1905 if self._match(TokenType.L_PAREN, advance=False): 1906 return self._parse_wrapped_properties() 1907 1908 if self._match_text_seq("JOURNAL"): 1909 return self._parse_withjournaltable() 1910 1911 if self._match_texts(self.VIEW_ATTRIBUTES): 1912 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 1913 1914 if self._match_text_seq("DATA"): 1915 return self._parse_withdata(no=False) 1916 elif self._match_text_seq("NO", "DATA"): 1917 return self._parse_withdata(no=True) 1918 1919 if not self._next: 1920 return None 1921 1922 return self._parse_withisolatedloading() 1923 1924 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1925 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1926 self._match(TokenType.EQ) 1927 1928 user = self._parse_id_var() 1929 self._match(TokenType.PARAMETER) 1930 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1931 1932 if not user or not host: 1933 return None 1934 1935 return exp.DefinerProperty(this=f"{user}@{host}") 1936 1937 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1938 self._match(TokenType.TABLE) 1939 self._match(TokenType.EQ) 1940 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1941 1942 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1943 return self.expression(exp.LogProperty, no=no) 1944 1945 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1946 return self.expression(exp.JournalProperty, **kwargs) 1947 1948 def _parse_checksum(self) -> exp.ChecksumProperty: 1949 self._match(TokenType.EQ) 1950 1951 on = None 1952 if self._match(TokenType.ON): 1953 on = True 1954 elif self._match_text_seq("OFF"): 1955 on = False 1956 1957 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1958 1959 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 1960 return self.expression( 1961 exp.Cluster, 1962 expressions=( 1963 self._parse_wrapped_csv(self._parse_ordered) 1964 if wrapped 1965 else self._parse_csv(self._parse_ordered) 1966 ), 1967 ) 1968 1969 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1970 self._match_text_seq("BY") 1971 1972 self._match_l_paren() 1973 expressions = self._parse_csv(self._parse_column) 1974 self._match_r_paren() 1975 1976 if self._match_text_seq("SORTED", "BY"): 1977 self._match_l_paren() 1978 sorted_by = self._parse_csv(self._parse_ordered) 1979 self._match_r_paren() 1980 else: 1981 sorted_by = None 1982 1983 self._match(TokenType.INTO) 1984 buckets = self._parse_number() 1985 self._match_text_seq("BUCKETS") 1986 1987 return self.expression( 1988 exp.ClusteredByProperty, 1989 expressions=expressions, 1990 sorted_by=sorted_by, 1991 buckets=buckets, 1992 ) 1993 1994 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1995 if not self._match_text_seq("GRANTS"): 1996 self._retreat(self._index - 1) 1997 return None 1998 1999 return self.expression(exp.CopyGrantsProperty) 2000 2001 def _parse_freespace(self) -> exp.FreespaceProperty: 2002 self._match(TokenType.EQ) 2003 return self.expression( 2004 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2005 ) 2006 2007 def _parse_mergeblockratio( 2008 self, no: bool = False, default: bool = False 2009 ) -> exp.MergeBlockRatioProperty: 2010 if self._match(TokenType.EQ): 2011 return self.expression( 2012 exp.MergeBlockRatioProperty, 2013 this=self._parse_number(), 2014 percent=self._match(TokenType.PERCENT), 2015 ) 2016 2017 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2018 2019 def _parse_datablocksize( 2020 self, 2021 default: t.Optional[bool] = None, 2022 minimum: t.Optional[bool] = None, 2023 maximum: t.Optional[bool] = None, 2024 ) -> exp.DataBlocksizeProperty: 2025 self._match(TokenType.EQ) 2026 size = self._parse_number() 2027 2028 units = None 2029 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2030 units = self._prev.text 2031 2032 return self.expression( 2033 exp.DataBlocksizeProperty, 2034 size=size, 2035 units=units, 2036 default=default, 2037 minimum=minimum, 2038 maximum=maximum, 2039 ) 2040 2041 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2042 self._match(TokenType.EQ) 2043 always = self._match_text_seq("ALWAYS") 2044 manual = self._match_text_seq("MANUAL") 2045 never = self._match_text_seq("NEVER") 2046 default = self._match_text_seq("DEFAULT") 2047 2048 autotemp = None 2049 if self._match_text_seq("AUTOTEMP"): 2050 autotemp = self._parse_schema() 2051 2052 return self.expression( 2053 exp.BlockCompressionProperty, 2054 always=always, 2055 manual=manual, 2056 never=never, 2057 default=default, 2058 autotemp=autotemp, 2059 ) 2060 2061 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2062 index = self._index 2063 no = self._match_text_seq("NO") 2064 concurrent = self._match_text_seq("CONCURRENT") 2065 2066 if not self._match_text_seq("ISOLATED", "LOADING"): 2067 self._retreat(index) 2068 return None 2069 2070 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2071 return self.expression( 2072 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2073 ) 2074 2075 def _parse_locking(self) -> exp.LockingProperty: 2076 if self._match(TokenType.TABLE): 2077 kind = "TABLE" 2078 elif self._match(TokenType.VIEW): 2079 kind = "VIEW" 2080 elif self._match(TokenType.ROW): 2081 kind = "ROW" 2082 elif self._match_text_seq("DATABASE"): 2083 kind = "DATABASE" 2084 else: 2085 kind = None 2086 2087 if kind in ("DATABASE", "TABLE", "VIEW"): 2088 this = self._parse_table_parts() 2089 else: 2090 this = None 2091 2092 if self._match(TokenType.FOR): 2093 for_or_in = "FOR" 2094 elif self._match(TokenType.IN): 2095 for_or_in = "IN" 2096 else: 2097 for_or_in = None 2098 2099 if self._match_text_seq("ACCESS"): 2100 lock_type = "ACCESS" 2101 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2102 lock_type = "EXCLUSIVE" 2103 elif self._match_text_seq("SHARE"): 2104 lock_type = "SHARE" 2105 elif self._match_text_seq("READ"): 2106 lock_type = "READ" 2107 elif self._match_text_seq("WRITE"): 2108 lock_type = "WRITE" 2109 elif self._match_text_seq("CHECKSUM"): 2110 lock_type = "CHECKSUM" 2111 else: 2112 lock_type = None 2113 2114 override = self._match_text_seq("OVERRIDE") 2115 2116 return self.expression( 2117 exp.LockingProperty, 2118 this=this, 2119 kind=kind, 2120 for_or_in=for_or_in, 2121 lock_type=lock_type, 2122 override=override, 2123 ) 2124 2125 def _parse_partition_by(self) -> t.List[exp.Expression]: 2126 if self._match(TokenType.PARTITION_BY): 2127 return self._parse_csv(self._parse_conjunction) 2128 return [] 2129 2130 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2131 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2132 if self._match_text_seq("MINVALUE"): 2133 return exp.var("MINVALUE") 2134 if self._match_text_seq("MAXVALUE"): 2135 return exp.var("MAXVALUE") 2136 return self._parse_bitwise() 2137 2138 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2139 expression = None 2140 from_expressions = None 2141 to_expressions = None 2142 2143 if self._match(TokenType.IN): 2144 this = self._parse_wrapped_csv(self._parse_bitwise) 2145 elif self._match(TokenType.FROM): 2146 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2147 self._match_text_seq("TO") 2148 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2149 elif self._match_text_seq("WITH", "(", "MODULUS"): 2150 this = self._parse_number() 2151 self._match_text_seq(",", "REMAINDER") 2152 expression = self._parse_number() 2153 self._match_r_paren() 2154 else: 2155 self.raise_error("Failed to parse partition bound spec.") 2156 2157 return self.expression( 2158 exp.PartitionBoundSpec, 2159 this=this, 2160 expression=expression, 2161 from_expressions=from_expressions, 2162 to_expressions=to_expressions, 2163 ) 2164 2165 # https://www.postgresql.org/docs/current/sql-createtable.html 2166 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2167 if not self._match_text_seq("OF"): 2168 self._retreat(self._index - 1) 2169 return None 2170 2171 this = self._parse_table(schema=True) 2172 2173 if self._match(TokenType.DEFAULT): 2174 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2175 elif self._match_text_seq("FOR", "VALUES"): 2176 expression = self._parse_partition_bound_spec() 2177 else: 2178 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2179 2180 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2181 2182 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2183 self._match(TokenType.EQ) 2184 return self.expression( 2185 exp.PartitionedByProperty, 2186 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2187 ) 2188 2189 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2190 if self._match_text_seq("AND", "STATISTICS"): 2191 statistics = True 2192 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2193 statistics = False 2194 else: 2195 statistics = None 2196 2197 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2198 2199 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2200 if self._match_text_seq("SQL"): 2201 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2202 return None 2203 2204 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2205 if self._match_text_seq("SQL", "DATA"): 2206 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2207 return None 2208 2209 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2210 if self._match_text_seq("PRIMARY", "INDEX"): 2211 return exp.NoPrimaryIndexProperty() 2212 if self._match_text_seq("SQL"): 2213 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2214 return None 2215 2216 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2217 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2218 return exp.OnCommitProperty() 2219 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2220 return exp.OnCommitProperty(delete=True) 2221 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2222 2223 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2224 if self._match_text_seq("SQL", "DATA"): 2225 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2226 return None 2227 2228 def _parse_distkey(self) -> exp.DistKeyProperty: 2229 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2230 2231 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2232 table = self._parse_table(schema=True) 2233 2234 options = [] 2235 while self._match_texts(("INCLUDING", "EXCLUDING")): 2236 this = self._prev.text.upper() 2237 2238 id_var = self._parse_id_var() 2239 if not id_var: 2240 return None 2241 2242 options.append( 2243 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2244 ) 2245 2246 return self.expression(exp.LikeProperty, this=table, expressions=options) 2247 2248 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2249 return self.expression( 2250 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2251 ) 2252 2253 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2254 self._match(TokenType.EQ) 2255 return self.expression( 2256 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2257 ) 2258 2259 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2260 self._match_text_seq("WITH", "CONNECTION") 2261 return self.expression( 2262 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2263 ) 2264 2265 def _parse_returns(self) -> exp.ReturnsProperty: 2266 value: t.Optional[exp.Expression] 2267 is_table = self._match(TokenType.TABLE) 2268 2269 if is_table: 2270 if self._match(TokenType.LT): 2271 value = self.expression( 2272 exp.Schema, 2273 this="TABLE", 2274 expressions=self._parse_csv(self._parse_struct_types), 2275 ) 2276 if not self._match(TokenType.GT): 2277 self.raise_error("Expecting >") 2278 else: 2279 value = self._parse_schema(exp.var("TABLE")) 2280 else: 2281 value = self._parse_types() 2282 2283 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 2284 2285 def _parse_describe(self) -> exp.Describe: 2286 kind = self._match_set(self.CREATABLES) and self._prev.text 2287 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2288 if self._match(TokenType.DOT): 2289 style = None 2290 self._retreat(self._index - 2) 2291 this = self._parse_table(schema=True) 2292 properties = self._parse_properties() 2293 expressions = properties.expressions if properties else None 2294 return self.expression( 2295 exp.Describe, this=this, style=style, kind=kind, expressions=expressions 2296 ) 2297 2298 def _parse_insert(self) -> exp.Insert: 2299 comments = ensure_list(self._prev_comments) 2300 hint = self._parse_hint() 2301 overwrite = self._match(TokenType.OVERWRITE) 2302 ignore = self._match(TokenType.IGNORE) 2303 local = self._match_text_seq("LOCAL") 2304 alternative = None 2305 is_function = None 2306 2307 if self._match_text_seq("DIRECTORY"): 2308 this: t.Optional[exp.Expression] = self.expression( 2309 exp.Directory, 2310 this=self._parse_var_or_string(), 2311 local=local, 2312 row_format=self._parse_row_format(match_row=True), 2313 ) 2314 else: 2315 if self._match(TokenType.OR): 2316 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2317 2318 self._match(TokenType.INTO) 2319 comments += ensure_list(self._prev_comments) 2320 self._match(TokenType.TABLE) 2321 is_function = self._match(TokenType.FUNCTION) 2322 2323 this = ( 2324 self._parse_table(schema=True, parse_partition=True) 2325 if not is_function 2326 else self._parse_function() 2327 ) 2328 2329 returning = self._parse_returning() 2330 2331 return self.expression( 2332 exp.Insert, 2333 comments=comments, 2334 hint=hint, 2335 is_function=is_function, 2336 this=this, 2337 stored=self._match_text_seq("STORED") and self._parse_stored(), 2338 by_name=self._match_text_seq("BY", "NAME"), 2339 exists=self._parse_exists(), 2340 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 2341 and self._parse_conjunction(), 2342 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2343 conflict=self._parse_on_conflict(), 2344 returning=returning or self._parse_returning(), 2345 overwrite=overwrite, 2346 alternative=alternative, 2347 ignore=ignore, 2348 ) 2349 2350 def _parse_kill(self) -> exp.Kill: 2351 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2352 2353 return self.expression( 2354 exp.Kill, 2355 this=self._parse_primary(), 2356 kind=kind, 2357 ) 2358 2359 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2360 conflict = self._match_text_seq("ON", "CONFLICT") 2361 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2362 2363 if not conflict and not duplicate: 2364 return None 2365 2366 conflict_keys = None 2367 constraint = None 2368 2369 if conflict: 2370 if self._match_text_seq("ON", "CONSTRAINT"): 2371 constraint = self._parse_id_var() 2372 elif self._match(TokenType.L_PAREN): 2373 conflict_keys = self._parse_csv(self._parse_id_var) 2374 self._match_r_paren() 2375 2376 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2377 if self._prev.token_type == TokenType.UPDATE: 2378 self._match(TokenType.SET) 2379 expressions = self._parse_csv(self._parse_equality) 2380 else: 2381 expressions = None 2382 2383 return self.expression( 2384 exp.OnConflict, 2385 duplicate=duplicate, 2386 expressions=expressions, 2387 action=action, 2388 conflict_keys=conflict_keys, 2389 constraint=constraint, 2390 ) 2391 2392 def _parse_returning(self) -> t.Optional[exp.Returning]: 2393 if not self._match(TokenType.RETURNING): 2394 return None 2395 return self.expression( 2396 exp.Returning, 2397 expressions=self._parse_csv(self._parse_expression), 2398 into=self._match(TokenType.INTO) and self._parse_table_part(), 2399 ) 2400 2401 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2402 if not self._match(TokenType.FORMAT): 2403 return None 2404 return self._parse_row_format() 2405 2406 def _parse_row_format( 2407 self, match_row: bool = False 2408 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2409 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2410 return None 2411 2412 if self._match_text_seq("SERDE"): 2413 this = self._parse_string() 2414 2415 serde_properties = None 2416 if self._match(TokenType.SERDE_PROPERTIES): 2417 serde_properties = self.expression( 2418 exp.SerdeProperties, expressions=self._parse_wrapped_properties() 2419 ) 2420 2421 return self.expression( 2422 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2423 ) 2424 2425 self._match_text_seq("DELIMITED") 2426 2427 kwargs = {} 2428 2429 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2430 kwargs["fields"] = self._parse_string() 2431 if self._match_text_seq("ESCAPED", "BY"): 2432 kwargs["escaped"] = self._parse_string() 2433 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2434 kwargs["collection_items"] = self._parse_string() 2435 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2436 kwargs["map_keys"] = self._parse_string() 2437 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2438 kwargs["lines"] = self._parse_string() 2439 if self._match_text_seq("NULL", "DEFINED", "AS"): 2440 kwargs["null"] = self._parse_string() 2441 2442 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2443 2444 def _parse_load(self) -> exp.LoadData | exp.Command: 2445 if self._match_text_seq("DATA"): 2446 local = self._match_text_seq("LOCAL") 2447 self._match_text_seq("INPATH") 2448 inpath = self._parse_string() 2449 overwrite = self._match(TokenType.OVERWRITE) 2450 self._match_pair(TokenType.INTO, TokenType.TABLE) 2451 2452 return self.expression( 2453 exp.LoadData, 2454 this=self._parse_table(schema=True), 2455 local=local, 2456 overwrite=overwrite, 2457 inpath=inpath, 2458 partition=self._parse_partition(), 2459 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2460 serde=self._match_text_seq("SERDE") and self._parse_string(), 2461 ) 2462 return self._parse_as_command(self._prev) 2463 2464 def _parse_delete(self) -> exp.Delete: 2465 # This handles MySQL's "Multiple-Table Syntax" 2466 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2467 tables = None 2468 comments = self._prev_comments 2469 if not self._match(TokenType.FROM, advance=False): 2470 tables = self._parse_csv(self._parse_table) or None 2471 2472 returning = self._parse_returning() 2473 2474 return self.expression( 2475 exp.Delete, 2476 comments=comments, 2477 tables=tables, 2478 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2479 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2480 where=self._parse_where(), 2481 returning=returning or self._parse_returning(), 2482 limit=self._parse_limit(), 2483 ) 2484 2485 def _parse_update(self) -> exp.Update: 2486 comments = self._prev_comments 2487 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2488 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2489 returning = self._parse_returning() 2490 return self.expression( 2491 exp.Update, 2492 comments=comments, 2493 **{ # type: ignore 2494 "this": this, 2495 "expressions": expressions, 2496 "from": self._parse_from(joins=True), 2497 "where": self._parse_where(), 2498 "returning": returning or self._parse_returning(), 2499 "order": self._parse_order(), 2500 "limit": self._parse_limit(), 2501 }, 2502 ) 2503 2504 def _parse_uncache(self) -> exp.Uncache: 2505 if not self._match(TokenType.TABLE): 2506 self.raise_error("Expecting TABLE after UNCACHE") 2507 2508 return self.expression( 2509 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2510 ) 2511 2512 def _parse_cache(self) -> exp.Cache: 2513 lazy = self._match_text_seq("LAZY") 2514 self._match(TokenType.TABLE) 2515 table = self._parse_table(schema=True) 2516 2517 options = [] 2518 if self._match_text_seq("OPTIONS"): 2519 self._match_l_paren() 2520 k = self._parse_string() 2521 self._match(TokenType.EQ) 2522 v = self._parse_string() 2523 options = [k, v] 2524 self._match_r_paren() 2525 2526 self._match(TokenType.ALIAS) 2527 return self.expression( 2528 exp.Cache, 2529 this=table, 2530 lazy=lazy, 2531 options=options, 2532 expression=self._parse_select(nested=True), 2533 ) 2534 2535 def _parse_partition(self) -> t.Optional[exp.Partition]: 2536 if not self._match(TokenType.PARTITION): 2537 return None 2538 2539 return self.expression( 2540 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 2541 ) 2542 2543 def _parse_value(self) -> t.Optional[exp.Tuple]: 2544 if self._match(TokenType.L_PAREN): 2545 expressions = self._parse_csv(self._parse_expression) 2546 self._match_r_paren() 2547 return self.expression(exp.Tuple, expressions=expressions) 2548 2549 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2550 expression = self._parse_expression() 2551 if expression: 2552 return self.expression(exp.Tuple, expressions=[expression]) 2553 return None 2554 2555 def _parse_projections(self) -> t.List[exp.Expression]: 2556 return self._parse_expressions() 2557 2558 def _parse_select( 2559 self, 2560 nested: bool = False, 2561 table: bool = False, 2562 parse_subquery_alias: bool = True, 2563 parse_set_operation: bool = True, 2564 ) -> t.Optional[exp.Expression]: 2565 cte = self._parse_with() 2566 2567 if cte: 2568 this = self._parse_statement() 2569 2570 if not this: 2571 self.raise_error("Failed to parse any statement following CTE") 2572 return cte 2573 2574 if "with" in this.arg_types: 2575 this.set("with", cte) 2576 else: 2577 self.raise_error(f"{this.key} does not support CTE") 2578 this = cte 2579 2580 return this 2581 2582 # duckdb supports leading with FROM x 2583 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2584 2585 if self._match(TokenType.SELECT): 2586 comments = self._prev_comments 2587 2588 hint = self._parse_hint() 2589 all_ = self._match(TokenType.ALL) 2590 distinct = self._match_set(self.DISTINCT_TOKENS) 2591 2592 kind = ( 2593 self._match(TokenType.ALIAS) 2594 and self._match_texts(("STRUCT", "VALUE")) 2595 and self._prev.text.upper() 2596 ) 2597 2598 if distinct: 2599 distinct = self.expression( 2600 exp.Distinct, 2601 on=self._parse_value() if self._match(TokenType.ON) else None, 2602 ) 2603 2604 if all_ and distinct: 2605 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2606 2607 limit = self._parse_limit(top=True) 2608 projections = self._parse_projections() 2609 2610 this = self.expression( 2611 exp.Select, 2612 kind=kind, 2613 hint=hint, 2614 distinct=distinct, 2615 expressions=projections, 2616 limit=limit, 2617 ) 2618 this.comments = comments 2619 2620 into = self._parse_into() 2621 if into: 2622 this.set("into", into) 2623 2624 if not from_: 2625 from_ = self._parse_from() 2626 2627 if from_: 2628 this.set("from", from_) 2629 2630 this = self._parse_query_modifiers(this) 2631 elif (table or nested) and self._match(TokenType.L_PAREN): 2632 if self._match(TokenType.PIVOT): 2633 this = self._parse_simplified_pivot() 2634 elif self._match(TokenType.FROM): 2635 this = exp.select("*").from_( 2636 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2637 ) 2638 else: 2639 this = ( 2640 self._parse_table() 2641 if table 2642 else self._parse_select(nested=True, parse_set_operation=False) 2643 ) 2644 this = self._parse_query_modifiers(self._parse_set_operations(this)) 2645 2646 self._match_r_paren() 2647 2648 # We return early here so that the UNION isn't attached to the subquery by the 2649 # following call to _parse_set_operations, but instead becomes the parent node 2650 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2651 elif self._match(TokenType.VALUES, advance=False): 2652 this = self._parse_derived_table_values() 2653 elif from_: 2654 this = exp.select("*").from_(from_.this, copy=False) 2655 else: 2656 this = None 2657 2658 if parse_set_operation: 2659 return self._parse_set_operations(this) 2660 return this 2661 2662 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2663 if not skip_with_token and not self._match(TokenType.WITH): 2664 return None 2665 2666 comments = self._prev_comments 2667 recursive = self._match(TokenType.RECURSIVE) 2668 2669 expressions = [] 2670 while True: 2671 expressions.append(self._parse_cte()) 2672 2673 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2674 break 2675 else: 2676 self._match(TokenType.WITH) 2677 2678 return self.expression( 2679 exp.With, comments=comments, expressions=expressions, recursive=recursive 2680 ) 2681 2682 def _parse_cte(self) -> exp.CTE: 2683 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 2684 if not alias or not alias.this: 2685 self.raise_error("Expected CTE to have alias") 2686 2687 self._match(TokenType.ALIAS) 2688 2689 if self._match_text_seq("NOT", "MATERIALIZED"): 2690 materialized = False 2691 elif self._match_text_seq("MATERIALIZED"): 2692 materialized = True 2693 else: 2694 materialized = None 2695 2696 return self.expression( 2697 exp.CTE, 2698 this=self._parse_wrapped(self._parse_statement), 2699 alias=alias, 2700 materialized=materialized, 2701 ) 2702 2703 def _parse_table_alias( 2704 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2705 ) -> t.Optional[exp.TableAlias]: 2706 any_token = self._match(TokenType.ALIAS) 2707 alias = ( 2708 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2709 or self._parse_string_as_identifier() 2710 ) 2711 2712 index = self._index 2713 if self._match(TokenType.L_PAREN): 2714 columns = self._parse_csv(self._parse_function_parameter) 2715 self._match_r_paren() if columns else self._retreat(index) 2716 else: 2717 columns = None 2718 2719 if not alias and not columns: 2720 return None 2721 2722 return self.expression(exp.TableAlias, this=alias, columns=columns) 2723 2724 def _parse_subquery( 2725 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2726 ) -> t.Optional[exp.Subquery]: 2727 if not this: 2728 return None 2729 2730 return self.expression( 2731 exp.Subquery, 2732 this=this, 2733 pivots=self._parse_pivots(), 2734 alias=self._parse_table_alias() if parse_alias else None, 2735 ) 2736 2737 def _implicit_unnests_to_explicit(self, this: E) -> E: 2738 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 2739 2740 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 2741 for i, join in enumerate(this.args.get("joins") or []): 2742 table = join.this 2743 normalized_table = table.copy() 2744 normalized_table.meta["maybe_column"] = True 2745 normalized_table = _norm(normalized_table, dialect=self.dialect) 2746 2747 if isinstance(table, exp.Table) and not join.args.get("on"): 2748 if normalized_table.parts[0].name in refs: 2749 table_as_column = table.to_column() 2750 unnest = exp.Unnest(expressions=[table_as_column]) 2751 2752 # Table.to_column creates a parent Alias node that we want to convert to 2753 # a TableAlias and attach to the Unnest, so it matches the parser's output 2754 if isinstance(table.args.get("alias"), exp.TableAlias): 2755 table_as_column.replace(table_as_column.this) 2756 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 2757 2758 table.replace(unnest) 2759 2760 refs.add(normalized_table.alias_or_name) 2761 2762 return this 2763 2764 def _parse_query_modifiers( 2765 self, this: t.Optional[exp.Expression] 2766 ) -> t.Optional[exp.Expression]: 2767 if isinstance(this, (exp.Query, exp.Table)): 2768 for join in self._parse_joins(): 2769 this.append("joins", join) 2770 for lateral in iter(self._parse_lateral, None): 2771 this.append("laterals", lateral) 2772 2773 while True: 2774 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2775 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2776 key, expression = parser(self) 2777 2778 if expression: 2779 this.set(key, expression) 2780 if key == "limit": 2781 offset = expression.args.pop("offset", None) 2782 2783 if offset: 2784 offset = exp.Offset(expression=offset) 2785 this.set("offset", offset) 2786 2787 limit_by_expressions = expression.expressions 2788 expression.set("expressions", None) 2789 offset.set("expressions", limit_by_expressions) 2790 continue 2791 break 2792 2793 if self.SUPPORTS_IMPLICIT_UNNEST and this and "from" in this.args: 2794 this = self._implicit_unnests_to_explicit(this) 2795 2796 return this 2797 2798 def _parse_hint(self) -> t.Optional[exp.Hint]: 2799 if self._match(TokenType.HINT): 2800 hints = [] 2801 for hint in iter( 2802 lambda: self._parse_csv( 2803 lambda: self._parse_function() or self._parse_var(upper=True) 2804 ), 2805 [], 2806 ): 2807 hints.extend(hint) 2808 2809 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2810 self.raise_error("Expected */ after HINT") 2811 2812 return self.expression(exp.Hint, expressions=hints) 2813 2814 return None 2815 2816 def _parse_into(self) -> t.Optional[exp.Into]: 2817 if not self._match(TokenType.INTO): 2818 return None 2819 2820 temp = self._match(TokenType.TEMPORARY) 2821 unlogged = self._match_text_seq("UNLOGGED") 2822 self._match(TokenType.TABLE) 2823 2824 return self.expression( 2825 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2826 ) 2827 2828 def _parse_from( 2829 self, joins: bool = False, skip_from_token: bool = False 2830 ) -> t.Optional[exp.From]: 2831 if not skip_from_token and not self._match(TokenType.FROM): 2832 return None 2833 2834 return self.expression( 2835 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2836 ) 2837 2838 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 2839 return self.expression( 2840 exp.MatchRecognizeMeasure, 2841 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 2842 this=self._parse_expression(), 2843 ) 2844 2845 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2846 if not self._match(TokenType.MATCH_RECOGNIZE): 2847 return None 2848 2849 self._match_l_paren() 2850 2851 partition = self._parse_partition_by() 2852 order = self._parse_order() 2853 2854 measures = ( 2855 self._parse_csv(self._parse_match_recognize_measure) 2856 if self._match_text_seq("MEASURES") 2857 else None 2858 ) 2859 2860 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2861 rows = exp.var("ONE ROW PER MATCH") 2862 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2863 text = "ALL ROWS PER MATCH" 2864 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2865 text += " SHOW EMPTY MATCHES" 2866 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2867 text += " OMIT EMPTY MATCHES" 2868 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2869 text += " WITH UNMATCHED ROWS" 2870 rows = exp.var(text) 2871 else: 2872 rows = None 2873 2874 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2875 text = "AFTER MATCH SKIP" 2876 if self._match_text_seq("PAST", "LAST", "ROW"): 2877 text += " PAST LAST ROW" 2878 elif self._match_text_seq("TO", "NEXT", "ROW"): 2879 text += " TO NEXT ROW" 2880 elif self._match_text_seq("TO", "FIRST"): 2881 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2882 elif self._match_text_seq("TO", "LAST"): 2883 text += f" TO LAST {self._advance_any().text}" # type: ignore 2884 after = exp.var(text) 2885 else: 2886 after = None 2887 2888 if self._match_text_seq("PATTERN"): 2889 self._match_l_paren() 2890 2891 if not self._curr: 2892 self.raise_error("Expecting )", self._curr) 2893 2894 paren = 1 2895 start = self._curr 2896 2897 while self._curr and paren > 0: 2898 if self._curr.token_type == TokenType.L_PAREN: 2899 paren += 1 2900 if self._curr.token_type == TokenType.R_PAREN: 2901 paren -= 1 2902 2903 end = self._prev 2904 self._advance() 2905 2906 if paren > 0: 2907 self.raise_error("Expecting )", self._curr) 2908 2909 pattern = exp.var(self._find_sql(start, end)) 2910 else: 2911 pattern = None 2912 2913 define = ( 2914 self._parse_csv(self._parse_name_as_expression) 2915 if self._match_text_seq("DEFINE") 2916 else None 2917 ) 2918 2919 self._match_r_paren() 2920 2921 return self.expression( 2922 exp.MatchRecognize, 2923 partition_by=partition, 2924 order=order, 2925 measures=measures, 2926 rows=rows, 2927 after=after, 2928 pattern=pattern, 2929 define=define, 2930 alias=self._parse_table_alias(), 2931 ) 2932 2933 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2934 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2935 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 2936 cross_apply = False 2937 2938 if cross_apply is not None: 2939 this = self._parse_select(table=True) 2940 view = None 2941 outer = None 2942 elif self._match(TokenType.LATERAL): 2943 this = self._parse_select(table=True) 2944 view = self._match(TokenType.VIEW) 2945 outer = self._match(TokenType.OUTER) 2946 else: 2947 return None 2948 2949 if not this: 2950 this = ( 2951 self._parse_unnest() 2952 or self._parse_function() 2953 or self._parse_id_var(any_token=False) 2954 ) 2955 2956 while self._match(TokenType.DOT): 2957 this = exp.Dot( 2958 this=this, 2959 expression=self._parse_function() or self._parse_id_var(any_token=False), 2960 ) 2961 2962 if view: 2963 table = self._parse_id_var(any_token=False) 2964 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2965 table_alias: t.Optional[exp.TableAlias] = self.expression( 2966 exp.TableAlias, this=table, columns=columns 2967 ) 2968 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 2969 # We move the alias from the lateral's child node to the lateral itself 2970 table_alias = this.args["alias"].pop() 2971 else: 2972 table_alias = self._parse_table_alias() 2973 2974 return self.expression( 2975 exp.Lateral, 2976 this=this, 2977 view=view, 2978 outer=outer, 2979 alias=table_alias, 2980 cross_apply=cross_apply, 2981 ) 2982 2983 def _parse_join_parts( 2984 self, 2985 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2986 return ( 2987 self._match_set(self.JOIN_METHODS) and self._prev, 2988 self._match_set(self.JOIN_SIDES) and self._prev, 2989 self._match_set(self.JOIN_KINDS) and self._prev, 2990 ) 2991 2992 def _parse_join( 2993 self, skip_join_token: bool = False, parse_bracket: bool = False 2994 ) -> t.Optional[exp.Join]: 2995 if self._match(TokenType.COMMA): 2996 return self.expression(exp.Join, this=self._parse_table()) 2997 2998 index = self._index 2999 method, side, kind = self._parse_join_parts() 3000 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3001 join = self._match(TokenType.JOIN) 3002 3003 if not skip_join_token and not join: 3004 self._retreat(index) 3005 kind = None 3006 method = None 3007 side = None 3008 3009 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3010 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3011 3012 if not skip_join_token and not join and not outer_apply and not cross_apply: 3013 return None 3014 3015 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3016 3017 if method: 3018 kwargs["method"] = method.text 3019 if side: 3020 kwargs["side"] = side.text 3021 if kind: 3022 kwargs["kind"] = kind.text 3023 if hint: 3024 kwargs["hint"] = hint 3025 3026 if self._match(TokenType.MATCH_CONDITION): 3027 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3028 3029 if self._match(TokenType.ON): 3030 kwargs["on"] = self._parse_conjunction() 3031 elif self._match(TokenType.USING): 3032 kwargs["using"] = self._parse_wrapped_id_vars() 3033 elif not isinstance(kwargs["this"], exp.Unnest) and not ( 3034 kind and kind.token_type == TokenType.CROSS 3035 ): 3036 index = self._index 3037 joins: t.Optional[list] = list(self._parse_joins()) 3038 3039 if joins and self._match(TokenType.ON): 3040 kwargs["on"] = self._parse_conjunction() 3041 elif joins and self._match(TokenType.USING): 3042 kwargs["using"] = self._parse_wrapped_id_vars() 3043 else: 3044 joins = None 3045 self._retreat(index) 3046 3047 kwargs["this"].set("joins", joins if joins else None) 3048 3049 comments = [c for token in (method, side, kind) if token for c in token.comments] 3050 return self.expression(exp.Join, comments=comments, **kwargs) 3051 3052 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3053 this = self._parse_conjunction() 3054 3055 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3056 return this 3057 3058 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3059 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3060 3061 return this 3062 3063 def _parse_index_params(self) -> exp.IndexParameters: 3064 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3065 3066 if self._match(TokenType.L_PAREN, advance=False): 3067 columns = self._parse_wrapped_csv(self._parse_with_operator) 3068 else: 3069 columns = None 3070 3071 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3072 partition_by = self._parse_partition_by() 3073 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3074 tablespace = ( 3075 self._parse_var(any_token=True) 3076 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3077 else None 3078 ) 3079 where = self._parse_where() 3080 3081 return self.expression( 3082 exp.IndexParameters, 3083 using=using, 3084 columns=columns, 3085 include=include, 3086 partition_by=partition_by, 3087 where=where, 3088 with_storage=with_storage, 3089 tablespace=tablespace, 3090 ) 3091 3092 def _parse_index( 3093 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3094 ) -> t.Optional[exp.Index]: 3095 if index or anonymous: 3096 unique = None 3097 primary = None 3098 amp = None 3099 3100 self._match(TokenType.ON) 3101 self._match(TokenType.TABLE) # hive 3102 table = self._parse_table_parts(schema=True) 3103 else: 3104 unique = self._match(TokenType.UNIQUE) 3105 primary = self._match_text_seq("PRIMARY") 3106 amp = self._match_text_seq("AMP") 3107 3108 if not self._match(TokenType.INDEX): 3109 return None 3110 3111 index = self._parse_id_var() 3112 table = None 3113 3114 params = self._parse_index_params() 3115 3116 return self.expression( 3117 exp.Index, 3118 this=index, 3119 table=table, 3120 unique=unique, 3121 primary=primary, 3122 amp=amp, 3123 params=params, 3124 ) 3125 3126 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3127 hints: t.List[exp.Expression] = [] 3128 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3129 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3130 hints.append( 3131 self.expression( 3132 exp.WithTableHint, 3133 expressions=self._parse_csv( 3134 lambda: self._parse_function() or self._parse_var(any_token=True) 3135 ), 3136 ) 3137 ) 3138 self._match_r_paren() 3139 else: 3140 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3141 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3142 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3143 3144 self._match_texts(("INDEX", "KEY")) 3145 if self._match(TokenType.FOR): 3146 hint.set("target", self._advance_any() and self._prev.text.upper()) 3147 3148 hint.set("expressions", self._parse_wrapped_id_vars()) 3149 hints.append(hint) 3150 3151 return hints or None 3152 3153 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3154 return ( 3155 (not schema and self._parse_function(optional_parens=False)) 3156 or self._parse_id_var(any_token=False) 3157 or self._parse_string_as_identifier() 3158 or self._parse_placeholder() 3159 ) 3160 3161 def _parse_table_parts( 3162 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3163 ) -> exp.Table: 3164 catalog = None 3165 db = None 3166 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3167 3168 while self._match(TokenType.DOT): 3169 if catalog: 3170 # This allows nesting the table in arbitrarily many dot expressions if needed 3171 table = self.expression( 3172 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3173 ) 3174 else: 3175 catalog = db 3176 db = table 3177 # "" used for tsql FROM a..b case 3178 table = self._parse_table_part(schema=schema) or "" 3179 3180 if ( 3181 wildcard 3182 and self._is_connected() 3183 and (isinstance(table, exp.Identifier) or not table) 3184 and self._match(TokenType.STAR) 3185 ): 3186 if isinstance(table, exp.Identifier): 3187 table.args["this"] += "*" 3188 else: 3189 table = exp.Identifier(this="*") 3190 3191 # We bubble up comments from the Identifier to the Table 3192 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3193 3194 if is_db_reference: 3195 catalog = db 3196 db = table 3197 table = None 3198 3199 if not table and not is_db_reference: 3200 self.raise_error(f"Expected table name but got {self._curr}") 3201 if not db and is_db_reference: 3202 self.raise_error(f"Expected database name but got {self._curr}") 3203 3204 return self.expression( 3205 exp.Table, 3206 comments=comments, 3207 this=table, 3208 db=db, 3209 catalog=catalog, 3210 pivots=self._parse_pivots(), 3211 ) 3212 3213 def _parse_table( 3214 self, 3215 schema: bool = False, 3216 joins: bool = False, 3217 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3218 parse_bracket: bool = False, 3219 is_db_reference: bool = False, 3220 parse_partition: bool = False, 3221 ) -> t.Optional[exp.Expression]: 3222 lateral = self._parse_lateral() 3223 if lateral: 3224 return lateral 3225 3226 unnest = self._parse_unnest() 3227 if unnest: 3228 return unnest 3229 3230 values = self._parse_derived_table_values() 3231 if values: 3232 return values 3233 3234 subquery = self._parse_select(table=True) 3235 if subquery: 3236 if not subquery.args.get("pivots"): 3237 subquery.set("pivots", self._parse_pivots()) 3238 return subquery 3239 3240 bracket = parse_bracket and self._parse_bracket(None) 3241 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3242 3243 only = self._match(TokenType.ONLY) 3244 3245 this = t.cast( 3246 exp.Expression, 3247 bracket 3248 or self._parse_bracket( 3249 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3250 ), 3251 ) 3252 3253 if only: 3254 this.set("only", only) 3255 3256 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3257 self._match_text_seq("*") 3258 3259 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3260 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3261 this.set("partition", self._parse_partition()) 3262 3263 if schema: 3264 return self._parse_schema(this=this) 3265 3266 version = self._parse_version() 3267 3268 if version: 3269 this.set("version", version) 3270 3271 if self.dialect.ALIAS_POST_TABLESAMPLE: 3272 table_sample = self._parse_table_sample() 3273 3274 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3275 if alias: 3276 this.set("alias", alias) 3277 3278 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3279 return self.expression( 3280 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3281 ) 3282 3283 this.set("hints", self._parse_table_hints()) 3284 3285 if not this.args.get("pivots"): 3286 this.set("pivots", self._parse_pivots()) 3287 3288 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3289 table_sample = self._parse_table_sample() 3290 3291 if table_sample: 3292 table_sample.set("this", this) 3293 this = table_sample 3294 3295 if joins: 3296 for join in self._parse_joins(): 3297 this.append("joins", join) 3298 3299 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3300 this.set("ordinality", True) 3301 this.set("alias", self._parse_table_alias()) 3302 3303 return this 3304 3305 def _parse_version(self) -> t.Optional[exp.Version]: 3306 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3307 this = "TIMESTAMP" 3308 elif self._match(TokenType.VERSION_SNAPSHOT): 3309 this = "VERSION" 3310 else: 3311 return None 3312 3313 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3314 kind = self._prev.text.upper() 3315 start = self._parse_bitwise() 3316 self._match_texts(("TO", "AND")) 3317 end = self._parse_bitwise() 3318 expression: t.Optional[exp.Expression] = self.expression( 3319 exp.Tuple, expressions=[start, end] 3320 ) 3321 elif self._match_text_seq("CONTAINED", "IN"): 3322 kind = "CONTAINED IN" 3323 expression = self.expression( 3324 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3325 ) 3326 elif self._match(TokenType.ALL): 3327 kind = "ALL" 3328 expression = None 3329 else: 3330 self._match_text_seq("AS", "OF") 3331 kind = "AS OF" 3332 expression = self._parse_type() 3333 3334 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3335 3336 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3337 if not self._match(TokenType.UNNEST): 3338 return None 3339 3340 expressions = self._parse_wrapped_csv(self._parse_equality) 3341 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3342 3343 alias = self._parse_table_alias() if with_alias else None 3344 3345 if alias: 3346 if self.dialect.UNNEST_COLUMN_ONLY: 3347 if alias.args.get("columns"): 3348 self.raise_error("Unexpected extra column alias in unnest.") 3349 3350 alias.set("columns", [alias.this]) 3351 alias.set("this", None) 3352 3353 columns = alias.args.get("columns") or [] 3354 if offset and len(expressions) < len(columns): 3355 offset = columns.pop() 3356 3357 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3358 self._match(TokenType.ALIAS) 3359 offset = self._parse_id_var( 3360 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3361 ) or exp.to_identifier("offset") 3362 3363 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3364 3365 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3366 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3367 if not is_derived and not self._match_text_seq("VALUES"): 3368 return None 3369 3370 expressions = self._parse_csv(self._parse_value) 3371 alias = self._parse_table_alias() 3372 3373 if is_derived: 3374 self._match_r_paren() 3375 3376 return self.expression( 3377 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3378 ) 3379 3380 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3381 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3382 as_modifier and self._match_text_seq("USING", "SAMPLE") 3383 ): 3384 return None 3385 3386 bucket_numerator = None 3387 bucket_denominator = None 3388 bucket_field = None 3389 percent = None 3390 size = None 3391 seed = None 3392 3393 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3394 matched_l_paren = self._match(TokenType.L_PAREN) 3395 3396 if self.TABLESAMPLE_CSV: 3397 num = None 3398 expressions = self._parse_csv(self._parse_primary) 3399 else: 3400 expressions = None 3401 num = ( 3402 self._parse_factor() 3403 if self._match(TokenType.NUMBER, advance=False) 3404 else self._parse_primary() or self._parse_placeholder() 3405 ) 3406 3407 if self._match_text_seq("BUCKET"): 3408 bucket_numerator = self._parse_number() 3409 self._match_text_seq("OUT", "OF") 3410 bucket_denominator = bucket_denominator = self._parse_number() 3411 self._match(TokenType.ON) 3412 bucket_field = self._parse_field() 3413 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3414 percent = num 3415 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3416 size = num 3417 else: 3418 percent = num 3419 3420 if matched_l_paren: 3421 self._match_r_paren() 3422 3423 if self._match(TokenType.L_PAREN): 3424 method = self._parse_var(upper=True) 3425 seed = self._match(TokenType.COMMA) and self._parse_number() 3426 self._match_r_paren() 3427 elif self._match_texts(("SEED", "REPEATABLE")): 3428 seed = self._parse_wrapped(self._parse_number) 3429 3430 if not method and self.DEFAULT_SAMPLING_METHOD: 3431 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3432 3433 return self.expression( 3434 exp.TableSample, 3435 expressions=expressions, 3436 method=method, 3437 bucket_numerator=bucket_numerator, 3438 bucket_denominator=bucket_denominator, 3439 bucket_field=bucket_field, 3440 percent=percent, 3441 size=size, 3442 seed=seed, 3443 ) 3444 3445 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3446 return list(iter(self._parse_pivot, None)) or None 3447 3448 def _parse_joins(self) -> t.Iterator[exp.Join]: 3449 return iter(self._parse_join, None) 3450 3451 # https://duckdb.org/docs/sql/statements/pivot 3452 def _parse_simplified_pivot(self) -> exp.Pivot: 3453 def _parse_on() -> t.Optional[exp.Expression]: 3454 this = self._parse_bitwise() 3455 return self._parse_in(this) if self._match(TokenType.IN) else this 3456 3457 this = self._parse_table() 3458 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3459 using = self._match(TokenType.USING) and self._parse_csv( 3460 lambda: self._parse_alias(self._parse_function()) 3461 ) 3462 group = self._parse_group() 3463 return self.expression( 3464 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3465 ) 3466 3467 def _parse_pivot_in(self) -> exp.In: 3468 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3469 this = self._parse_conjunction() 3470 3471 self._match(TokenType.ALIAS) 3472 alias = self._parse_field() 3473 if alias: 3474 return self.expression(exp.PivotAlias, this=this, alias=alias) 3475 3476 return this 3477 3478 value = self._parse_column() 3479 3480 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3481 self.raise_error("Expecting IN (") 3482 3483 aliased_expressions = self._parse_csv(_parse_aliased_expression) 3484 3485 self._match_r_paren() 3486 return self.expression(exp.In, this=value, expressions=aliased_expressions) 3487 3488 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3489 index = self._index 3490 include_nulls = None 3491 3492 if self._match(TokenType.PIVOT): 3493 unpivot = False 3494 elif self._match(TokenType.UNPIVOT): 3495 unpivot = True 3496 3497 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3498 if self._match_text_seq("INCLUDE", "NULLS"): 3499 include_nulls = True 3500 elif self._match_text_seq("EXCLUDE", "NULLS"): 3501 include_nulls = False 3502 else: 3503 return None 3504 3505 expressions = [] 3506 3507 if not self._match(TokenType.L_PAREN): 3508 self._retreat(index) 3509 return None 3510 3511 if unpivot: 3512 expressions = self._parse_csv(self._parse_column) 3513 else: 3514 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3515 3516 if not expressions: 3517 self.raise_error("Failed to parse PIVOT's aggregation list") 3518 3519 if not self._match(TokenType.FOR): 3520 self.raise_error("Expecting FOR") 3521 3522 field = self._parse_pivot_in() 3523 3524 self._match_r_paren() 3525 3526 pivot = self.expression( 3527 exp.Pivot, 3528 expressions=expressions, 3529 field=field, 3530 unpivot=unpivot, 3531 include_nulls=include_nulls, 3532 ) 3533 3534 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3535 pivot.set("alias", self._parse_table_alias()) 3536 3537 if not unpivot: 3538 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3539 3540 columns: t.List[exp.Expression] = [] 3541 for fld in pivot.args["field"].expressions: 3542 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3543 for name in names: 3544 if self.PREFIXED_PIVOT_COLUMNS: 3545 name = f"{name}_{field_name}" if name else field_name 3546 else: 3547 name = f"{field_name}_{name}" if name else field_name 3548 3549 columns.append(exp.to_identifier(name)) 3550 3551 pivot.set("columns", columns) 3552 3553 return pivot 3554 3555 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 3556 return [agg.alias for agg in aggregations] 3557 3558 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 3559 if not skip_where_token and not self._match(TokenType.PREWHERE): 3560 return None 3561 3562 return self.expression( 3563 exp.PreWhere, comments=self._prev_comments, this=self._parse_conjunction() 3564 ) 3565 3566 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 3567 if not skip_where_token and not self._match(TokenType.WHERE): 3568 return None 3569 3570 return self.expression( 3571 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 3572 ) 3573 3574 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 3575 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 3576 return None 3577 3578 elements: t.Dict[str, t.Any] = defaultdict(list) 3579 3580 if self._match(TokenType.ALL): 3581 elements["all"] = True 3582 elif self._match(TokenType.DISTINCT): 3583 elements["all"] = False 3584 3585 while True: 3586 expressions = self._parse_csv( 3587 lambda: None 3588 if self._match(TokenType.ROLLUP, advance=False) 3589 else self._parse_conjunction() 3590 ) 3591 if expressions: 3592 elements["expressions"].extend(expressions) 3593 3594 grouping_sets = self._parse_grouping_sets() 3595 if grouping_sets: 3596 elements["grouping_sets"].extend(grouping_sets) 3597 3598 rollup = None 3599 cube = None 3600 totals = None 3601 3602 index = self._index 3603 with_ = self._match(TokenType.WITH) 3604 if self._match(TokenType.ROLLUP): 3605 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 3606 elements["rollup"].extend(ensure_list(rollup)) 3607 3608 if self._match(TokenType.CUBE): 3609 cube = with_ or self._parse_wrapped_csv(self._parse_column) 3610 elements["cube"].extend(ensure_list(cube)) 3611 3612 if self._match_text_seq("TOTALS"): 3613 totals = True 3614 elements["totals"] = True # type: ignore 3615 3616 if not (grouping_sets or rollup or cube or totals): 3617 if with_: 3618 self._retreat(index) 3619 break 3620 3621 return self.expression(exp.Group, **elements) # type: ignore 3622 3623 def _parse_grouping_sets(self) -> t.Optional[t.List[exp.Expression]]: 3624 if not self._match(TokenType.GROUPING_SETS): 3625 return None 3626 3627 return self._parse_wrapped_csv(self._parse_grouping_set) 3628 3629 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 3630 if self._match(TokenType.L_PAREN): 3631 grouping_set = self._parse_csv(self._parse_column) 3632 self._match_r_paren() 3633 return self.expression(exp.Tuple, expressions=grouping_set) 3634 3635 return self._parse_column() 3636 3637 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 3638 if not skip_having_token and not self._match(TokenType.HAVING): 3639 return None 3640 return self.expression(exp.Having, this=self._parse_conjunction()) 3641 3642 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 3643 if not self._match(TokenType.QUALIFY): 3644 return None 3645 return self.expression(exp.Qualify, this=self._parse_conjunction()) 3646 3647 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 3648 if skip_start_token: 3649 start = None 3650 elif self._match(TokenType.START_WITH): 3651 start = self._parse_conjunction() 3652 else: 3653 return None 3654 3655 self._match(TokenType.CONNECT_BY) 3656 nocycle = self._match_text_seq("NOCYCLE") 3657 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 3658 exp.Prior, this=self._parse_bitwise() 3659 ) 3660 connect = self._parse_conjunction() 3661 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 3662 3663 if not start and self._match(TokenType.START_WITH): 3664 start = self._parse_conjunction() 3665 3666 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 3667 3668 def _parse_name_as_expression(self) -> exp.Alias: 3669 return self.expression( 3670 exp.Alias, 3671 alias=self._parse_id_var(any_token=True), 3672 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 3673 ) 3674 3675 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 3676 if self._match_text_seq("INTERPOLATE"): 3677 return self._parse_wrapped_csv(self._parse_name_as_expression) 3678 return None 3679 3680 def _parse_order( 3681 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 3682 ) -> t.Optional[exp.Expression]: 3683 siblings = None 3684 if not skip_order_token and not self._match(TokenType.ORDER_BY): 3685 if not self._match(TokenType.ORDER_SIBLINGS_BY): 3686 return this 3687 3688 siblings = True 3689 3690 return self.expression( 3691 exp.Order, 3692 this=this, 3693 expressions=self._parse_csv(self._parse_ordered), 3694 interpolate=self._parse_interpolate(), 3695 siblings=siblings, 3696 ) 3697 3698 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 3699 if not self._match(token): 3700 return None 3701 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 3702 3703 def _parse_ordered( 3704 self, parse_method: t.Optional[t.Callable] = None 3705 ) -> t.Optional[exp.Ordered]: 3706 this = parse_method() if parse_method else self._parse_conjunction() 3707 if not this: 3708 return None 3709 3710 asc = self._match(TokenType.ASC) 3711 desc = self._match(TokenType.DESC) or (asc and False) 3712 3713 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 3714 is_nulls_last = self._match_text_seq("NULLS", "LAST") 3715 3716 nulls_first = is_nulls_first or False 3717 explicitly_null_ordered = is_nulls_first or is_nulls_last 3718 3719 if ( 3720 not explicitly_null_ordered 3721 and ( 3722 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 3723 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 3724 ) 3725 and self.dialect.NULL_ORDERING != "nulls_are_last" 3726 ): 3727 nulls_first = True 3728 3729 if self._match_text_seq("WITH", "FILL"): 3730 with_fill = self.expression( 3731 exp.WithFill, 3732 **{ # type: ignore 3733 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 3734 "to": self._match_text_seq("TO") and self._parse_bitwise(), 3735 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 3736 }, 3737 ) 3738 else: 3739 with_fill = None 3740 3741 return self.expression( 3742 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 3743 ) 3744 3745 def _parse_limit( 3746 self, 3747 this: t.Optional[exp.Expression] = None, 3748 top: bool = False, 3749 skip_limit_token: bool = False, 3750 ) -> t.Optional[exp.Expression]: 3751 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 3752 comments = self._prev_comments 3753 if top: 3754 limit_paren = self._match(TokenType.L_PAREN) 3755 expression = self._parse_term() if limit_paren else self._parse_number() 3756 3757 if limit_paren: 3758 self._match_r_paren() 3759 else: 3760 expression = self._parse_term() 3761 3762 if self._match(TokenType.COMMA): 3763 offset = expression 3764 expression = self._parse_term() 3765 else: 3766 offset = None 3767 3768 limit_exp = self.expression( 3769 exp.Limit, 3770 this=this, 3771 expression=expression, 3772 offset=offset, 3773 comments=comments, 3774 expressions=self._parse_limit_by(), 3775 ) 3776 3777 return limit_exp 3778 3779 if self._match(TokenType.FETCH): 3780 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 3781 direction = self._prev.text.upper() if direction else "FIRST" 3782 3783 count = self._parse_field(tokens=self.FETCH_TOKENS) 3784 percent = self._match(TokenType.PERCENT) 3785 3786 self._match_set((TokenType.ROW, TokenType.ROWS)) 3787 3788 only = self._match_text_seq("ONLY") 3789 with_ties = self._match_text_seq("WITH", "TIES") 3790 3791 if only and with_ties: 3792 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 3793 3794 return self.expression( 3795 exp.Fetch, 3796 direction=direction, 3797 count=count, 3798 percent=percent, 3799 with_ties=with_ties, 3800 ) 3801 3802 return this 3803 3804 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3805 if not self._match(TokenType.OFFSET): 3806 return this 3807 3808 count = self._parse_term() 3809 self._match_set((TokenType.ROW, TokenType.ROWS)) 3810 3811 return self.expression( 3812 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 3813 ) 3814 3815 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 3816 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 3817 3818 def _parse_locks(self) -> t.List[exp.Lock]: 3819 locks = [] 3820 while True: 3821 if self._match_text_seq("FOR", "UPDATE"): 3822 update = True 3823 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 3824 "LOCK", "IN", "SHARE", "MODE" 3825 ): 3826 update = False 3827 else: 3828 break 3829 3830 expressions = None 3831 if self._match_text_seq("OF"): 3832 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 3833 3834 wait: t.Optional[bool | exp.Expression] = None 3835 if self._match_text_seq("NOWAIT"): 3836 wait = True 3837 elif self._match_text_seq("WAIT"): 3838 wait = self._parse_primary() 3839 elif self._match_text_seq("SKIP", "LOCKED"): 3840 wait = False 3841 3842 locks.append( 3843 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 3844 ) 3845 3846 return locks 3847 3848 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3849 while this and self._match_set(self.SET_OPERATIONS): 3850 token_type = self._prev.token_type 3851 3852 if token_type == TokenType.UNION: 3853 operation = exp.Union 3854 elif token_type == TokenType.EXCEPT: 3855 operation = exp.Except 3856 else: 3857 operation = exp.Intersect 3858 3859 comments = self._prev.comments 3860 distinct = self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL) 3861 by_name = self._match_text_seq("BY", "NAME") 3862 expression = self._parse_select(nested=True, parse_set_operation=False) 3863 3864 this = self.expression( 3865 operation, 3866 comments=comments, 3867 this=this, 3868 distinct=distinct, 3869 by_name=by_name, 3870 expression=expression, 3871 ) 3872 3873 if isinstance(this, exp.Union) and self.MODIFIERS_ATTACHED_TO_UNION: 3874 expression = this.expression 3875 3876 if expression: 3877 for arg in self.UNION_MODIFIERS: 3878 expr = expression.args.get(arg) 3879 if expr: 3880 this.set(arg, expr.pop()) 3881 3882 return this 3883 3884 def _parse_expression(self) -> t.Optional[exp.Expression]: 3885 return self._parse_alias(self._parse_conjunction()) 3886 3887 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 3888 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 3889 3890 def _parse_equality(self) -> t.Optional[exp.Expression]: 3891 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 3892 3893 def _parse_comparison(self) -> t.Optional[exp.Expression]: 3894 return self._parse_tokens(self._parse_range, self.COMPARISON) 3895 3896 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3897 this = this or self._parse_bitwise() 3898 negate = self._match(TokenType.NOT) 3899 3900 if self._match_set(self.RANGE_PARSERS): 3901 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 3902 if not expression: 3903 return this 3904 3905 this = expression 3906 elif self._match(TokenType.ISNULL): 3907 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3908 3909 # Postgres supports ISNULL and NOTNULL for conditions. 3910 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 3911 if self._match(TokenType.NOTNULL): 3912 this = self.expression(exp.Is, this=this, expression=exp.Null()) 3913 this = self.expression(exp.Not, this=this) 3914 3915 if negate: 3916 this = self.expression(exp.Not, this=this) 3917 3918 if self._match(TokenType.IS): 3919 this = self._parse_is(this) 3920 3921 return this 3922 3923 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3924 index = self._index - 1 3925 negate = self._match(TokenType.NOT) 3926 3927 if self._match_text_seq("DISTINCT", "FROM"): 3928 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 3929 return self.expression(klass, this=this, expression=self._parse_bitwise()) 3930 3931 expression = self._parse_null() or self._parse_boolean() 3932 if not expression: 3933 self._retreat(index) 3934 return None 3935 3936 this = self.expression(exp.Is, this=this, expression=expression) 3937 return self.expression(exp.Not, this=this) if negate else this 3938 3939 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 3940 unnest = self._parse_unnest(with_alias=False) 3941 if unnest: 3942 this = self.expression(exp.In, this=this, unnest=unnest) 3943 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 3944 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 3945 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 3946 3947 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 3948 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 3949 else: 3950 this = self.expression(exp.In, this=this, expressions=expressions) 3951 3952 if matched_l_paren: 3953 self._match_r_paren(this) 3954 elif not self._match(TokenType.R_BRACKET, expression=this): 3955 self.raise_error("Expecting ]") 3956 else: 3957 this = self.expression(exp.In, this=this, field=self._parse_field()) 3958 3959 return this 3960 3961 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 3962 low = self._parse_bitwise() 3963 self._match(TokenType.AND) 3964 high = self._parse_bitwise() 3965 return self.expression(exp.Between, this=this, low=low, high=high) 3966 3967 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3968 if not self._match(TokenType.ESCAPE): 3969 return this 3970 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 3971 3972 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Interval]: 3973 index = self._index 3974 3975 if not self._match(TokenType.INTERVAL) and match_interval: 3976 return None 3977 3978 if self._match(TokenType.STRING, advance=False): 3979 this = self._parse_primary() 3980 else: 3981 this = self._parse_term() 3982 3983 if not this or ( 3984 isinstance(this, exp.Column) 3985 and not this.table 3986 and not this.this.quoted 3987 and this.name.upper() == "IS" 3988 ): 3989 self._retreat(index) 3990 return None 3991 3992 unit = self._parse_function() or ( 3993 not self._match(TokenType.ALIAS, advance=False) 3994 and self._parse_var(any_token=True, upper=True) 3995 ) 3996 3997 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 3998 # each INTERVAL expression into this canonical form so it's easy to transpile 3999 if this and this.is_number: 4000 this = exp.Literal.string(this.name) 4001 elif this and this.is_string: 4002 parts = this.name.split() 4003 4004 if len(parts) == 2: 4005 if unit: 4006 # This is not actually a unit, it's something else (e.g. a "window side") 4007 unit = None 4008 self._retreat(self._index - 1) 4009 4010 this = exp.Literal.string(parts[0]) 4011 unit = self.expression(exp.Var, this=parts[1].upper()) 4012 4013 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4014 unit = self.expression( 4015 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4016 ) 4017 4018 return self.expression(exp.Interval, this=this, unit=unit) 4019 4020 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4021 this = self._parse_term() 4022 4023 while True: 4024 if self._match_set(self.BITWISE): 4025 this = self.expression( 4026 self.BITWISE[self._prev.token_type], 4027 this=this, 4028 expression=self._parse_term(), 4029 ) 4030 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4031 this = self.expression( 4032 exp.DPipe, 4033 this=this, 4034 expression=self._parse_term(), 4035 safe=not self.dialect.STRICT_STRING_CONCAT, 4036 ) 4037 elif self._match(TokenType.DQMARK): 4038 this = self.expression(exp.Coalesce, this=this, expressions=self._parse_term()) 4039 elif self._match_pair(TokenType.LT, TokenType.LT): 4040 this = self.expression( 4041 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4042 ) 4043 elif self._match_pair(TokenType.GT, TokenType.GT): 4044 this = self.expression( 4045 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4046 ) 4047 else: 4048 break 4049 4050 return this 4051 4052 def _parse_term(self) -> t.Optional[exp.Expression]: 4053 return self._parse_tokens(self._parse_factor, self.TERM) 4054 4055 def _parse_factor(self) -> t.Optional[exp.Expression]: 4056 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4057 this = parse_method() 4058 4059 while self._match_set(self.FACTOR): 4060 this = self.expression( 4061 self.FACTOR[self._prev.token_type], 4062 this=this, 4063 comments=self._prev_comments, 4064 expression=parse_method(), 4065 ) 4066 if isinstance(this, exp.Div): 4067 this.args["typed"] = self.dialect.TYPED_DIVISION 4068 this.args["safe"] = self.dialect.SAFE_DIVISION 4069 4070 return this 4071 4072 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4073 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4074 4075 def _parse_unary(self) -> t.Optional[exp.Expression]: 4076 if self._match_set(self.UNARY_PARSERS): 4077 return self.UNARY_PARSERS[self._prev.token_type](self) 4078 return self._parse_at_time_zone(self._parse_type()) 4079 4080 def _parse_type(self, parse_interval: bool = True) -> t.Optional[exp.Expression]: 4081 interval = parse_interval and self._parse_interval() 4082 if interval: 4083 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4084 while True: 4085 index = self._index 4086 self._match(TokenType.PLUS) 4087 4088 if not self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4089 self._retreat(index) 4090 break 4091 4092 interval = self.expression( # type: ignore 4093 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4094 ) 4095 4096 return interval 4097 4098 index = self._index 4099 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4100 this = self._parse_column() 4101 4102 if data_type: 4103 if isinstance(this, exp.Literal): 4104 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4105 if parser: 4106 return parser(self, this, data_type) 4107 return self.expression(exp.Cast, this=this, to=data_type) 4108 if not data_type.expressions: 4109 self._retreat(index) 4110 return self._parse_column() 4111 return self._parse_column_ops(data_type) 4112 4113 return this and self._parse_column_ops(this) 4114 4115 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4116 this = self._parse_type() 4117 if not this: 4118 return None 4119 4120 if isinstance(this, exp.Column) and not this.table: 4121 this = exp.var(this.name.upper()) 4122 4123 return self.expression( 4124 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4125 ) 4126 4127 def _parse_types( 4128 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4129 ) -> t.Optional[exp.Expression]: 4130 index = self._index 4131 4132 this: t.Optional[exp.Expression] = None 4133 prefix = self._match_text_seq("SYSUDTLIB", ".") 4134 4135 if not self._match_set(self.TYPE_TOKENS): 4136 identifier = allow_identifiers and self._parse_id_var( 4137 any_token=False, tokens=(TokenType.VAR,) 4138 ) 4139 if identifier: 4140 tokens = self.dialect.tokenize(identifier.name) 4141 4142 if len(tokens) != 1: 4143 self.raise_error("Unexpected identifier", self._prev) 4144 4145 if tokens[0].token_type in self.TYPE_TOKENS: 4146 self._prev = tokens[0] 4147 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4148 type_name = identifier.name 4149 4150 while self._match(TokenType.DOT): 4151 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4152 4153 this = exp.DataType.build(type_name, udt=True) 4154 else: 4155 self._retreat(self._index - 1) 4156 return None 4157 else: 4158 return None 4159 4160 type_token = self._prev.token_type 4161 4162 if type_token == TokenType.PSEUDO_TYPE: 4163 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4164 4165 if type_token == TokenType.OBJECT_IDENTIFIER: 4166 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4167 4168 nested = type_token in self.NESTED_TYPE_TOKENS 4169 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4170 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4171 expressions = None 4172 maybe_func = False 4173 4174 if self._match(TokenType.L_PAREN): 4175 if is_struct: 4176 expressions = self._parse_csv(self._parse_struct_types) 4177 elif nested: 4178 expressions = self._parse_csv( 4179 lambda: self._parse_types( 4180 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4181 ) 4182 ) 4183 elif type_token in self.ENUM_TYPE_TOKENS: 4184 expressions = self._parse_csv(self._parse_equality) 4185 elif is_aggregate: 4186 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4187 any_token=False, tokens=(TokenType.VAR,) 4188 ) 4189 if not func_or_ident or not self._match(TokenType.COMMA): 4190 return None 4191 expressions = self._parse_csv( 4192 lambda: self._parse_types( 4193 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4194 ) 4195 ) 4196 expressions.insert(0, func_or_ident) 4197 else: 4198 expressions = self._parse_csv(self._parse_type_size) 4199 4200 if not expressions or not self._match(TokenType.R_PAREN): 4201 self._retreat(index) 4202 return None 4203 4204 maybe_func = True 4205 4206 values: t.Optional[t.List[exp.Expression]] = None 4207 4208 if nested and self._match(TokenType.LT): 4209 if is_struct: 4210 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4211 else: 4212 expressions = self._parse_csv( 4213 lambda: self._parse_types( 4214 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4215 ) 4216 ) 4217 4218 if not self._match(TokenType.GT): 4219 self.raise_error("Expecting >") 4220 4221 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4222 values = self._parse_csv(self._parse_conjunction) 4223 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4224 4225 if type_token in self.TIMESTAMPS: 4226 if self._match_text_seq("WITH", "TIME", "ZONE"): 4227 maybe_func = False 4228 tz_type = ( 4229 exp.DataType.Type.TIMETZ 4230 if type_token in self.TIMES 4231 else exp.DataType.Type.TIMESTAMPTZ 4232 ) 4233 this = exp.DataType(this=tz_type, expressions=expressions) 4234 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4235 maybe_func = False 4236 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4237 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4238 maybe_func = False 4239 elif type_token == TokenType.INTERVAL: 4240 unit = self._parse_var(upper=True) 4241 if unit: 4242 if self._match_text_seq("TO"): 4243 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4244 4245 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4246 else: 4247 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4248 4249 if maybe_func and check_func: 4250 index2 = self._index 4251 peek = self._parse_string() 4252 4253 if not peek: 4254 self._retreat(index) 4255 return None 4256 4257 self._retreat(index2) 4258 4259 if not this: 4260 if self._match_text_seq("UNSIGNED"): 4261 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4262 if not unsigned_type_token: 4263 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4264 4265 type_token = unsigned_type_token or type_token 4266 4267 this = exp.DataType( 4268 this=exp.DataType.Type[type_token.value], 4269 expressions=expressions, 4270 nested=nested, 4271 values=values, 4272 prefix=prefix, 4273 ) 4274 elif expressions: 4275 this.set("expressions", expressions) 4276 4277 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 4278 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 4279 4280 if self.TYPE_CONVERTER and isinstance(this.this, exp.DataType.Type): 4281 converter = self.TYPE_CONVERTER.get(this.this) 4282 if converter: 4283 this = converter(t.cast(exp.DataType, this)) 4284 4285 return this 4286 4287 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4288 index = self._index 4289 this = self._parse_type(parse_interval=False) or self._parse_id_var() 4290 self._match(TokenType.COLON) 4291 column_def = self._parse_column_def(this) 4292 4293 if type_required and ( 4294 (isinstance(this, exp.Column) and this.this is column_def) or this is column_def 4295 ): 4296 self._retreat(index) 4297 return self._parse_types() 4298 4299 return column_def 4300 4301 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4302 if not self._match_text_seq("AT", "TIME", "ZONE"): 4303 return this 4304 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4305 4306 def _parse_column(self) -> t.Optional[exp.Expression]: 4307 this = self._parse_column_reference() 4308 return self._parse_column_ops(this) if this else self._parse_bracket(this) 4309 4310 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4311 this = self._parse_field() 4312 if ( 4313 not this 4314 and self._match(TokenType.VALUES, advance=False) 4315 and self.VALUES_FOLLOWED_BY_PAREN 4316 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4317 ): 4318 this = self._parse_id_var() 4319 4320 if isinstance(this, exp.Identifier): 4321 # We bubble up comments from the Identifier to the Column 4322 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4323 4324 return this 4325 4326 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4327 this = self._parse_bracket(this) 4328 4329 while self._match_set(self.COLUMN_OPERATORS): 4330 op_token = self._prev.token_type 4331 op = self.COLUMN_OPERATORS.get(op_token) 4332 4333 if op_token == TokenType.DCOLON: 4334 field = self._parse_types() 4335 if not field: 4336 self.raise_error("Expected type") 4337 elif op and self._curr: 4338 field = self._parse_column_reference() 4339 else: 4340 field = self._parse_field(any_token=True, anonymous_func=True) 4341 4342 if isinstance(field, exp.Func) and this: 4343 # bigquery allows function calls like x.y.count(...) 4344 # SAFE.SUBSTR(...) 4345 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 4346 this = exp.replace_tree( 4347 this, 4348 lambda n: ( 4349 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 4350 if n.table 4351 else n.this 4352 ) 4353 if isinstance(n, exp.Column) 4354 else n, 4355 ) 4356 4357 if op: 4358 this = op(self, this, field) 4359 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 4360 this = self.expression( 4361 exp.Column, 4362 this=field, 4363 table=this.this, 4364 db=this.args.get("table"), 4365 catalog=this.args.get("db"), 4366 ) 4367 else: 4368 this = self.expression(exp.Dot, this=this, expression=field) 4369 this = self._parse_bracket(this) 4370 return this 4371 4372 def _parse_primary(self) -> t.Optional[exp.Expression]: 4373 if self._match_set(self.PRIMARY_PARSERS): 4374 token_type = self._prev.token_type 4375 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 4376 4377 if token_type == TokenType.STRING: 4378 expressions = [primary] 4379 while self._match(TokenType.STRING): 4380 expressions.append(exp.Literal.string(self._prev.text)) 4381 4382 if len(expressions) > 1: 4383 return self.expression(exp.Concat, expressions=expressions) 4384 4385 return primary 4386 4387 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 4388 return exp.Literal.number(f"0.{self._prev.text}") 4389 4390 if self._match(TokenType.L_PAREN): 4391 comments = self._prev_comments 4392 query = self._parse_select() 4393 4394 if query: 4395 expressions = [query] 4396 else: 4397 expressions = self._parse_expressions() 4398 4399 this = self._parse_query_modifiers(seq_get(expressions, 0)) 4400 4401 if not this and self._match(TokenType.R_PAREN, advance=False): 4402 this = self.expression(exp.Tuple) 4403 elif isinstance(this, exp.UNWRAPPED_QUERIES): 4404 this = self._parse_subquery(this=this, parse_alias=False) 4405 elif isinstance(this, exp.Subquery): 4406 this = self._parse_subquery( 4407 this=self._parse_set_operations(this), parse_alias=False 4408 ) 4409 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 4410 this = self.expression(exp.Tuple, expressions=expressions) 4411 else: 4412 this = self.expression(exp.Paren, this=this) 4413 4414 if this: 4415 this.add_comments(comments) 4416 4417 self._match_r_paren(expression=this) 4418 return this 4419 4420 return None 4421 4422 def _parse_field( 4423 self, 4424 any_token: bool = False, 4425 tokens: t.Optional[t.Collection[TokenType]] = None, 4426 anonymous_func: bool = False, 4427 ) -> t.Optional[exp.Expression]: 4428 if anonymous_func: 4429 field = ( 4430 self._parse_function(anonymous=anonymous_func, any_token=any_token) 4431 or self._parse_primary() 4432 ) 4433 else: 4434 field = self._parse_primary() or self._parse_function( 4435 anonymous=anonymous_func, any_token=any_token 4436 ) 4437 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 4438 4439 def _parse_function( 4440 self, 4441 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4442 anonymous: bool = False, 4443 optional_parens: bool = True, 4444 any_token: bool = False, 4445 ) -> t.Optional[exp.Expression]: 4446 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 4447 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 4448 fn_syntax = False 4449 if ( 4450 self._match(TokenType.L_BRACE, advance=False) 4451 and self._next 4452 and self._next.text.upper() == "FN" 4453 ): 4454 self._advance(2) 4455 fn_syntax = True 4456 4457 func = self._parse_function_call( 4458 functions=functions, 4459 anonymous=anonymous, 4460 optional_parens=optional_parens, 4461 any_token=any_token, 4462 ) 4463 4464 if fn_syntax: 4465 self._match(TokenType.R_BRACE) 4466 4467 return func 4468 4469 def _parse_function_call( 4470 self, 4471 functions: t.Optional[t.Dict[str, t.Callable]] = None, 4472 anonymous: bool = False, 4473 optional_parens: bool = True, 4474 any_token: bool = False, 4475 ) -> t.Optional[exp.Expression]: 4476 if not self._curr: 4477 return None 4478 4479 comments = self._curr.comments 4480 token_type = self._curr.token_type 4481 this = self._curr.text 4482 upper = this.upper() 4483 4484 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 4485 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 4486 self._advance() 4487 return self._parse_window(parser(self)) 4488 4489 if not self._next or self._next.token_type != TokenType.L_PAREN: 4490 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 4491 self._advance() 4492 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 4493 4494 return None 4495 4496 if any_token: 4497 if token_type in self.RESERVED_TOKENS: 4498 return None 4499 elif token_type not in self.FUNC_TOKENS: 4500 return None 4501 4502 self._advance(2) 4503 4504 parser = self.FUNCTION_PARSERS.get(upper) 4505 if parser and not anonymous: 4506 this = parser(self) 4507 else: 4508 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 4509 4510 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 4511 this = self.expression(subquery_predicate, this=self._parse_select()) 4512 self._match_r_paren() 4513 return this 4514 4515 if functions is None: 4516 functions = self.FUNCTIONS 4517 4518 function = functions.get(upper) 4519 4520 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 4521 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 4522 4523 if alias: 4524 args = self._kv_to_prop_eq(args) 4525 4526 if function and not anonymous: 4527 if "dialect" in function.__code__.co_varnames: 4528 func = function(args, dialect=self.dialect) 4529 else: 4530 func = function(args) 4531 4532 func = self.validate_expression(func, args) 4533 if not self.dialect.NORMALIZE_FUNCTIONS: 4534 func.meta["name"] = this 4535 4536 this = func 4537 else: 4538 if token_type == TokenType.IDENTIFIER: 4539 this = exp.Identifier(this=this, quoted=True) 4540 this = self.expression(exp.Anonymous, this=this, expressions=args) 4541 4542 if isinstance(this, exp.Expression): 4543 this.add_comments(comments) 4544 4545 self._match_r_paren(this) 4546 return self._parse_window(this) 4547 4548 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 4549 transformed = [] 4550 4551 for e in expressions: 4552 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 4553 if isinstance(e, exp.Alias): 4554 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 4555 4556 if not isinstance(e, exp.PropertyEQ): 4557 e = self.expression( 4558 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 4559 ) 4560 4561 if isinstance(e.this, exp.Column): 4562 e.this.replace(e.this.this) 4563 4564 transformed.append(e) 4565 4566 return transformed 4567 4568 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 4569 return self._parse_column_def(self._parse_id_var()) 4570 4571 def _parse_user_defined_function( 4572 self, kind: t.Optional[TokenType] = None 4573 ) -> t.Optional[exp.Expression]: 4574 this = self._parse_id_var() 4575 4576 while self._match(TokenType.DOT): 4577 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 4578 4579 if not self._match(TokenType.L_PAREN): 4580 return this 4581 4582 expressions = self._parse_csv(self._parse_function_parameter) 4583 self._match_r_paren() 4584 return self.expression( 4585 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 4586 ) 4587 4588 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 4589 literal = self._parse_primary() 4590 if literal: 4591 return self.expression(exp.Introducer, this=token.text, expression=literal) 4592 4593 return self.expression(exp.Identifier, this=token.text) 4594 4595 def _parse_session_parameter(self) -> exp.SessionParameter: 4596 kind = None 4597 this = self._parse_id_var() or self._parse_primary() 4598 4599 if this and self._match(TokenType.DOT): 4600 kind = this.name 4601 this = self._parse_var() or self._parse_primary() 4602 4603 return self.expression(exp.SessionParameter, this=this, kind=kind) 4604 4605 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 4606 index = self._index 4607 4608 if self._match(TokenType.L_PAREN): 4609 expressions = t.cast( 4610 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_id_var) 4611 ) 4612 4613 if not self._match(TokenType.R_PAREN): 4614 self._retreat(index) 4615 else: 4616 expressions = [self._parse_id_var()] 4617 4618 if self._match_set(self.LAMBDAS): 4619 return self.LAMBDAS[self._prev.token_type](self, expressions) 4620 4621 self._retreat(index) 4622 4623 this: t.Optional[exp.Expression] 4624 4625 if self._match(TokenType.DISTINCT): 4626 this = self.expression( 4627 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 4628 ) 4629 else: 4630 this = self._parse_select_or_expression(alias=alias) 4631 4632 return self._parse_limit( 4633 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 4634 ) 4635 4636 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4637 index = self._index 4638 if not self._match(TokenType.L_PAREN): 4639 return this 4640 4641 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 4642 # expr can be of both types 4643 if self._match_set(self.SELECT_START_TOKENS): 4644 self._retreat(index) 4645 return this 4646 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 4647 self._match_r_paren() 4648 return self.expression(exp.Schema, this=this, expressions=args) 4649 4650 def _parse_field_def(self) -> t.Optional[exp.Expression]: 4651 return self._parse_column_def(self._parse_field(any_token=True)) 4652 4653 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4654 # column defs are not really columns, they're identifiers 4655 if isinstance(this, exp.Column): 4656 this = this.this 4657 4658 kind = self._parse_types(schema=True) 4659 4660 if self._match_text_seq("FOR", "ORDINALITY"): 4661 return self.expression(exp.ColumnDef, this=this, ordinality=True) 4662 4663 constraints: t.List[exp.Expression] = [] 4664 4665 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 4666 ("ALIAS", "MATERIALIZED") 4667 ): 4668 persisted = self._prev.text.upper() == "MATERIALIZED" 4669 constraints.append( 4670 self.expression( 4671 exp.ComputedColumnConstraint, 4672 this=self._parse_conjunction(), 4673 persisted=persisted or self._match_text_seq("PERSISTED"), 4674 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 4675 ) 4676 ) 4677 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 4678 self._match(TokenType.ALIAS) 4679 constraints.append( 4680 self.expression(exp.TransformColumnConstraint, this=self._parse_field()) 4681 ) 4682 4683 while True: 4684 constraint = self._parse_column_constraint() 4685 if not constraint: 4686 break 4687 constraints.append(constraint) 4688 4689 if not kind and not constraints: 4690 return this 4691 4692 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 4693 4694 def _parse_auto_increment( 4695 self, 4696 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 4697 start = None 4698 increment = None 4699 4700 if self._match(TokenType.L_PAREN, advance=False): 4701 args = self._parse_wrapped_csv(self._parse_bitwise) 4702 start = seq_get(args, 0) 4703 increment = seq_get(args, 1) 4704 elif self._match_text_seq("START"): 4705 start = self._parse_bitwise() 4706 self._match_text_seq("INCREMENT") 4707 increment = self._parse_bitwise() 4708 4709 if start and increment: 4710 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 4711 4712 return exp.AutoIncrementColumnConstraint() 4713 4714 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 4715 if not self._match_text_seq("REFRESH"): 4716 self._retreat(self._index - 1) 4717 return None 4718 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 4719 4720 def _parse_compress(self) -> exp.CompressColumnConstraint: 4721 if self._match(TokenType.L_PAREN, advance=False): 4722 return self.expression( 4723 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 4724 ) 4725 4726 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 4727 4728 def _parse_generated_as_identity( 4729 self, 4730 ) -> ( 4731 exp.GeneratedAsIdentityColumnConstraint 4732 | exp.ComputedColumnConstraint 4733 | exp.GeneratedAsRowColumnConstraint 4734 ): 4735 if self._match_text_seq("BY", "DEFAULT"): 4736 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 4737 this = self.expression( 4738 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 4739 ) 4740 else: 4741 self._match_text_seq("ALWAYS") 4742 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 4743 4744 self._match(TokenType.ALIAS) 4745 4746 if self._match_text_seq("ROW"): 4747 start = self._match_text_seq("START") 4748 if not start: 4749 self._match(TokenType.END) 4750 hidden = self._match_text_seq("HIDDEN") 4751 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 4752 4753 identity = self._match_text_seq("IDENTITY") 4754 4755 if self._match(TokenType.L_PAREN): 4756 if self._match(TokenType.START_WITH): 4757 this.set("start", self._parse_bitwise()) 4758 if self._match_text_seq("INCREMENT", "BY"): 4759 this.set("increment", self._parse_bitwise()) 4760 if self._match_text_seq("MINVALUE"): 4761 this.set("minvalue", self._parse_bitwise()) 4762 if self._match_text_seq("MAXVALUE"): 4763 this.set("maxvalue", self._parse_bitwise()) 4764 4765 if self._match_text_seq("CYCLE"): 4766 this.set("cycle", True) 4767 elif self._match_text_seq("NO", "CYCLE"): 4768 this.set("cycle", False) 4769 4770 if not identity: 4771 this.set("expression", self._parse_range()) 4772 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 4773 args = self._parse_csv(self._parse_bitwise) 4774 this.set("start", seq_get(args, 0)) 4775 this.set("increment", seq_get(args, 1)) 4776 4777 self._match_r_paren() 4778 4779 return this 4780 4781 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 4782 self._match_text_seq("LENGTH") 4783 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 4784 4785 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 4786 if self._match_text_seq("NULL"): 4787 return self.expression(exp.NotNullColumnConstraint) 4788 if self._match_text_seq("CASESPECIFIC"): 4789 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 4790 if self._match_text_seq("FOR", "REPLICATION"): 4791 return self.expression(exp.NotForReplicationColumnConstraint) 4792 return None 4793 4794 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 4795 if self._match(TokenType.CONSTRAINT): 4796 this = self._parse_id_var() 4797 else: 4798 this = None 4799 4800 if self._match_texts(self.CONSTRAINT_PARSERS): 4801 return self.expression( 4802 exp.ColumnConstraint, 4803 this=this, 4804 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 4805 ) 4806 4807 return this 4808 4809 def _parse_constraint(self) -> t.Optional[exp.Expression]: 4810 if not self._match(TokenType.CONSTRAINT): 4811 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 4812 4813 return self.expression( 4814 exp.Constraint, 4815 this=self._parse_id_var(), 4816 expressions=self._parse_unnamed_constraints(), 4817 ) 4818 4819 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 4820 constraints = [] 4821 while True: 4822 constraint = self._parse_unnamed_constraint() or self._parse_function() 4823 if not constraint: 4824 break 4825 constraints.append(constraint) 4826 4827 return constraints 4828 4829 def _parse_unnamed_constraint( 4830 self, constraints: t.Optional[t.Collection[str]] = None 4831 ) -> t.Optional[exp.Expression]: 4832 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 4833 constraints or self.CONSTRAINT_PARSERS 4834 ): 4835 return None 4836 4837 constraint = self._prev.text.upper() 4838 if constraint not in self.CONSTRAINT_PARSERS: 4839 self.raise_error(f"No parser found for schema constraint {constraint}.") 4840 4841 return self.CONSTRAINT_PARSERS[constraint](self) 4842 4843 def _parse_unique(self) -> exp.UniqueColumnConstraint: 4844 self._match_text_seq("KEY") 4845 return self.expression( 4846 exp.UniqueColumnConstraint, 4847 this=self._parse_schema(self._parse_id_var(any_token=False)), 4848 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 4849 on_conflict=self._parse_on_conflict(), 4850 ) 4851 4852 def _parse_key_constraint_options(self) -> t.List[str]: 4853 options = [] 4854 while True: 4855 if not self._curr: 4856 break 4857 4858 if self._match(TokenType.ON): 4859 action = None 4860 on = self._advance_any() and self._prev.text 4861 4862 if self._match_text_seq("NO", "ACTION"): 4863 action = "NO ACTION" 4864 elif self._match_text_seq("CASCADE"): 4865 action = "CASCADE" 4866 elif self._match_text_seq("RESTRICT"): 4867 action = "RESTRICT" 4868 elif self._match_pair(TokenType.SET, TokenType.NULL): 4869 action = "SET NULL" 4870 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 4871 action = "SET DEFAULT" 4872 else: 4873 self.raise_error("Invalid key constraint") 4874 4875 options.append(f"ON {on} {action}") 4876 elif self._match_text_seq("NOT", "ENFORCED"): 4877 options.append("NOT ENFORCED") 4878 elif self._match_text_seq("DEFERRABLE"): 4879 options.append("DEFERRABLE") 4880 elif self._match_text_seq("INITIALLY", "DEFERRED"): 4881 options.append("INITIALLY DEFERRED") 4882 elif self._match_text_seq("NORELY"): 4883 options.append("NORELY") 4884 elif self._match_text_seq("MATCH", "FULL"): 4885 options.append("MATCH FULL") 4886 else: 4887 break 4888 4889 return options 4890 4891 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 4892 if match and not self._match(TokenType.REFERENCES): 4893 return None 4894 4895 expressions = None 4896 this = self._parse_table(schema=True) 4897 options = self._parse_key_constraint_options() 4898 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 4899 4900 def _parse_foreign_key(self) -> exp.ForeignKey: 4901 expressions = self._parse_wrapped_id_vars() 4902 reference = self._parse_references() 4903 options = {} 4904 4905 while self._match(TokenType.ON): 4906 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 4907 self.raise_error("Expected DELETE or UPDATE") 4908 4909 kind = self._prev.text.lower() 4910 4911 if self._match_text_seq("NO", "ACTION"): 4912 action = "NO ACTION" 4913 elif self._match(TokenType.SET): 4914 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 4915 action = "SET " + self._prev.text.upper() 4916 else: 4917 self._advance() 4918 action = self._prev.text.upper() 4919 4920 options[kind] = action 4921 4922 return self.expression( 4923 exp.ForeignKey, 4924 expressions=expressions, 4925 reference=reference, 4926 **options, # type: ignore 4927 ) 4928 4929 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 4930 return self._parse_field() 4931 4932 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 4933 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 4934 self._retreat(self._index - 1) 4935 return None 4936 4937 id_vars = self._parse_wrapped_id_vars() 4938 return self.expression( 4939 exp.PeriodForSystemTimeConstraint, 4940 this=seq_get(id_vars, 0), 4941 expression=seq_get(id_vars, 1), 4942 ) 4943 4944 def _parse_primary_key( 4945 self, wrapped_optional: bool = False, in_props: bool = False 4946 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 4947 desc = ( 4948 self._match_set((TokenType.ASC, TokenType.DESC)) 4949 and self._prev.token_type == TokenType.DESC 4950 ) 4951 4952 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 4953 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 4954 4955 expressions = self._parse_wrapped_csv( 4956 self._parse_primary_key_part, optional=wrapped_optional 4957 ) 4958 options = self._parse_key_constraint_options() 4959 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 4960 4961 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 4962 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 4963 4964 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4965 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 4966 return this 4967 4968 bracket_kind = self._prev.token_type 4969 expressions = self._parse_csv( 4970 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 4971 ) 4972 4973 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 4974 self.raise_error("Expected ]") 4975 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 4976 self.raise_error("Expected }") 4977 4978 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 4979 if bracket_kind == TokenType.L_BRACE: 4980 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 4981 elif not this or this.name.upper() == "ARRAY": 4982 this = self.expression(exp.Array, expressions=expressions) 4983 else: 4984 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 4985 this = self.expression(exp.Bracket, this=this, expressions=expressions) 4986 4987 self._add_comments(this) 4988 return self._parse_bracket(this) 4989 4990 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4991 if self._match(TokenType.COLON): 4992 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 4993 return this 4994 4995 def _parse_case(self) -> t.Optional[exp.Expression]: 4996 ifs = [] 4997 default = None 4998 4999 comments = self._prev_comments 5000 expression = self._parse_conjunction() 5001 5002 while self._match(TokenType.WHEN): 5003 this = self._parse_conjunction() 5004 self._match(TokenType.THEN) 5005 then = self._parse_conjunction() 5006 ifs.append(self.expression(exp.If, this=this, true=then)) 5007 5008 if self._match(TokenType.ELSE): 5009 default = self._parse_conjunction() 5010 5011 if not self._match(TokenType.END): 5012 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5013 default = exp.column("interval") 5014 else: 5015 self.raise_error("Expected END after CASE", self._prev) 5016 5017 return self.expression( 5018 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5019 ) 5020 5021 def _parse_if(self) -> t.Optional[exp.Expression]: 5022 if self._match(TokenType.L_PAREN): 5023 args = self._parse_csv(self._parse_conjunction) 5024 this = self.validate_expression(exp.If.from_arg_list(args), args) 5025 self._match_r_paren() 5026 else: 5027 index = self._index - 1 5028 5029 if self.NO_PAREN_IF_COMMANDS and index == 0: 5030 return self._parse_as_command(self._prev) 5031 5032 condition = self._parse_conjunction() 5033 5034 if not condition: 5035 self._retreat(index) 5036 return None 5037 5038 self._match(TokenType.THEN) 5039 true = self._parse_conjunction() 5040 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 5041 self._match(TokenType.END) 5042 this = self.expression(exp.If, this=condition, true=true, false=false) 5043 5044 return this 5045 5046 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5047 if not self._match_text_seq("VALUE", "FOR"): 5048 self._retreat(self._index - 1) 5049 return None 5050 5051 return self.expression( 5052 exp.NextValueFor, 5053 this=self._parse_column(), 5054 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5055 ) 5056 5057 def _parse_extract(self) -> exp.Extract: 5058 this = self._parse_function() or self._parse_var() or self._parse_type() 5059 5060 if self._match(TokenType.FROM): 5061 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5062 5063 if not self._match(TokenType.COMMA): 5064 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5065 5066 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5067 5068 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5069 this = self._parse_conjunction() 5070 5071 if not self._match(TokenType.ALIAS): 5072 if self._match(TokenType.COMMA): 5073 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5074 5075 self.raise_error("Expected AS after CAST") 5076 5077 fmt = None 5078 to = self._parse_types() 5079 5080 if self._match(TokenType.FORMAT): 5081 fmt_string = self._parse_string() 5082 fmt = self._parse_at_time_zone(fmt_string) 5083 5084 if not to: 5085 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5086 if to.this in exp.DataType.TEMPORAL_TYPES: 5087 this = self.expression( 5088 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5089 this=this, 5090 format=exp.Literal.string( 5091 format_time( 5092 fmt_string.this if fmt_string else "", 5093 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5094 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5095 ) 5096 ), 5097 ) 5098 5099 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5100 this.set("zone", fmt.args["zone"]) 5101 return this 5102 elif not to: 5103 self.raise_error("Expected TYPE after CAST") 5104 elif isinstance(to, exp.Identifier): 5105 to = exp.DataType.build(to.name, udt=True) 5106 elif to.this == exp.DataType.Type.CHAR: 5107 if self._match(TokenType.CHARACTER_SET): 5108 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5109 5110 return self.expression( 5111 exp.Cast if strict else exp.TryCast, 5112 this=this, 5113 to=to, 5114 format=fmt, 5115 safe=safe, 5116 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5117 ) 5118 5119 def _parse_string_agg(self) -> exp.Expression: 5120 if self._match(TokenType.DISTINCT): 5121 args: t.List[t.Optional[exp.Expression]] = [ 5122 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 5123 ] 5124 if self._match(TokenType.COMMA): 5125 args.extend(self._parse_csv(self._parse_conjunction)) 5126 else: 5127 args = self._parse_csv(self._parse_conjunction) # type: ignore 5128 5129 index = self._index 5130 if not self._match(TokenType.R_PAREN) and args: 5131 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5132 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5133 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5134 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5135 5136 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5137 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5138 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5139 if not self._match_text_seq("WITHIN", "GROUP"): 5140 self._retreat(index) 5141 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5142 5143 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5144 order = self._parse_order(this=seq_get(args, 0)) 5145 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5146 5147 def _parse_convert( 5148 self, strict: bool, safe: t.Optional[bool] = None 5149 ) -> t.Optional[exp.Expression]: 5150 this = self._parse_bitwise() 5151 5152 if self._match(TokenType.USING): 5153 to: t.Optional[exp.Expression] = self.expression( 5154 exp.CharacterSet, this=self._parse_var() 5155 ) 5156 elif self._match(TokenType.COMMA): 5157 to = self._parse_types() 5158 else: 5159 to = None 5160 5161 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5162 5163 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5164 """ 5165 There are generally two variants of the DECODE function: 5166 5167 - DECODE(bin, charset) 5168 - DECODE(expression, search, result [, search, result] ... [, default]) 5169 5170 The second variant will always be parsed into a CASE expression. Note that NULL 5171 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5172 instead of relying on pattern matching. 5173 """ 5174 args = self._parse_csv(self._parse_conjunction) 5175 5176 if len(args) < 3: 5177 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5178 5179 expression, *expressions = args 5180 if not expression: 5181 return None 5182 5183 ifs = [] 5184 for search, result in zip(expressions[::2], expressions[1::2]): 5185 if not search or not result: 5186 return None 5187 5188 if isinstance(search, exp.Literal): 5189 ifs.append( 5190 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5191 ) 5192 elif isinstance(search, exp.Null): 5193 ifs.append( 5194 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5195 ) 5196 else: 5197 cond = exp.or_( 5198 exp.EQ(this=expression.copy(), expression=search), 5199 exp.and_( 5200 exp.Is(this=expression.copy(), expression=exp.Null()), 5201 exp.Is(this=search.copy(), expression=exp.Null()), 5202 copy=False, 5203 ), 5204 copy=False, 5205 ) 5206 ifs.append(exp.If(this=cond, true=result)) 5207 5208 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5209 5210 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5211 self._match_text_seq("KEY") 5212 key = self._parse_column() 5213 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5214 self._match_text_seq("VALUE") 5215 value = self._parse_bitwise() 5216 5217 if not key and not value: 5218 return None 5219 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5220 5221 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5222 if not this or not self._match_text_seq("FORMAT", "JSON"): 5223 return this 5224 5225 return self.expression(exp.FormatJson, this=this) 5226 5227 def _parse_on_handling(self, on: str, *values: str) -> t.Optional[str]: 5228 # Parses the "X ON Y" syntax, i.e. NULL ON NULL (Oracle, T-SQL) 5229 for value in values: 5230 if self._match_text_seq(value, "ON", on): 5231 return f"{value} ON {on}" 5232 5233 return None 5234 5235 @t.overload 5236 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 5237 5238 @t.overload 5239 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 5240 5241 def _parse_json_object(self, agg=False): 5242 star = self._parse_star() 5243 expressions = ( 5244 [star] 5245 if star 5246 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 5247 ) 5248 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 5249 5250 unique_keys = None 5251 if self._match_text_seq("WITH", "UNIQUE"): 5252 unique_keys = True 5253 elif self._match_text_seq("WITHOUT", "UNIQUE"): 5254 unique_keys = False 5255 5256 self._match_text_seq("KEYS") 5257 5258 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 5259 self._parse_type() 5260 ) 5261 encoding = self._match_text_seq("ENCODING") and self._parse_var() 5262 5263 return self.expression( 5264 exp.JSONObjectAgg if agg else exp.JSONObject, 5265 expressions=expressions, 5266 null_handling=null_handling, 5267 unique_keys=unique_keys, 5268 return_type=return_type, 5269 encoding=encoding, 5270 ) 5271 5272 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 5273 def _parse_json_column_def(self) -> exp.JSONColumnDef: 5274 if not self._match_text_seq("NESTED"): 5275 this = self._parse_id_var() 5276 kind = self._parse_types(allow_identifiers=False) 5277 nested = None 5278 else: 5279 this = None 5280 kind = None 5281 nested = True 5282 5283 path = self._match_text_seq("PATH") and self._parse_string() 5284 nested_schema = nested and self._parse_json_schema() 5285 5286 return self.expression( 5287 exp.JSONColumnDef, 5288 this=this, 5289 kind=kind, 5290 path=path, 5291 nested_schema=nested_schema, 5292 ) 5293 5294 def _parse_json_schema(self) -> exp.JSONSchema: 5295 self._match_text_seq("COLUMNS") 5296 return self.expression( 5297 exp.JSONSchema, 5298 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 5299 ) 5300 5301 def _parse_json_table(self) -> exp.JSONTable: 5302 this = self._parse_format_json(self._parse_bitwise()) 5303 path = self._match(TokenType.COMMA) and self._parse_string() 5304 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 5305 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 5306 schema = self._parse_json_schema() 5307 5308 return exp.JSONTable( 5309 this=this, 5310 schema=schema, 5311 path=path, 5312 error_handling=error_handling, 5313 empty_handling=empty_handling, 5314 ) 5315 5316 def _parse_match_against(self) -> exp.MatchAgainst: 5317 expressions = self._parse_csv(self._parse_column) 5318 5319 self._match_text_seq(")", "AGAINST", "(") 5320 5321 this = self._parse_string() 5322 5323 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 5324 modifier = "IN NATURAL LANGUAGE MODE" 5325 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5326 modifier = f"{modifier} WITH QUERY EXPANSION" 5327 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 5328 modifier = "IN BOOLEAN MODE" 5329 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 5330 modifier = "WITH QUERY EXPANSION" 5331 else: 5332 modifier = None 5333 5334 return self.expression( 5335 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 5336 ) 5337 5338 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 5339 def _parse_open_json(self) -> exp.OpenJSON: 5340 this = self._parse_bitwise() 5341 path = self._match(TokenType.COMMA) and self._parse_string() 5342 5343 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 5344 this = self._parse_field(any_token=True) 5345 kind = self._parse_types() 5346 path = self._parse_string() 5347 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 5348 5349 return self.expression( 5350 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 5351 ) 5352 5353 expressions = None 5354 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 5355 self._match_l_paren() 5356 expressions = self._parse_csv(_parse_open_json_column_def) 5357 5358 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 5359 5360 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 5361 args = self._parse_csv(self._parse_bitwise) 5362 5363 if self._match(TokenType.IN): 5364 return self.expression( 5365 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 5366 ) 5367 5368 if haystack_first: 5369 haystack = seq_get(args, 0) 5370 needle = seq_get(args, 1) 5371 else: 5372 needle = seq_get(args, 0) 5373 haystack = seq_get(args, 1) 5374 5375 return self.expression( 5376 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 5377 ) 5378 5379 def _parse_predict(self) -> exp.Predict: 5380 self._match_text_seq("MODEL") 5381 this = self._parse_table() 5382 5383 self._match(TokenType.COMMA) 5384 self._match_text_seq("TABLE") 5385 5386 return self.expression( 5387 exp.Predict, 5388 this=this, 5389 expression=self._parse_table(), 5390 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 5391 ) 5392 5393 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 5394 args = self._parse_csv(self._parse_table) 5395 return exp.JoinHint(this=func_name.upper(), expressions=args) 5396 5397 def _parse_substring(self) -> exp.Substring: 5398 # Postgres supports the form: substring(string [from int] [for int]) 5399 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 5400 5401 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 5402 5403 if self._match(TokenType.FROM): 5404 args.append(self._parse_bitwise()) 5405 if self._match(TokenType.FOR): 5406 if len(args) == 1: 5407 args.append(exp.Literal.number(1)) 5408 args.append(self._parse_bitwise()) 5409 5410 return self.validate_expression(exp.Substring.from_arg_list(args), args) 5411 5412 def _parse_trim(self) -> exp.Trim: 5413 # https://www.w3resource.com/sql/character-functions/trim.php 5414 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 5415 5416 position = None 5417 collation = None 5418 expression = None 5419 5420 if self._match_texts(self.TRIM_TYPES): 5421 position = self._prev.text.upper() 5422 5423 this = self._parse_bitwise() 5424 if self._match_set((TokenType.FROM, TokenType.COMMA)): 5425 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 5426 expression = self._parse_bitwise() 5427 5428 if invert_order: 5429 this, expression = expression, this 5430 5431 if self._match(TokenType.COLLATE): 5432 collation = self._parse_bitwise() 5433 5434 return self.expression( 5435 exp.Trim, this=this, position=position, expression=expression, collation=collation 5436 ) 5437 5438 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 5439 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 5440 5441 def _parse_named_window(self) -> t.Optional[exp.Expression]: 5442 return self._parse_window(self._parse_id_var(), alias=True) 5443 5444 def _parse_respect_or_ignore_nulls( 5445 self, this: t.Optional[exp.Expression] 5446 ) -> t.Optional[exp.Expression]: 5447 if self._match_text_seq("IGNORE", "NULLS"): 5448 return self.expression(exp.IgnoreNulls, this=this) 5449 if self._match_text_seq("RESPECT", "NULLS"): 5450 return self.expression(exp.RespectNulls, this=this) 5451 return this 5452 5453 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5454 if self._match(TokenType.HAVING): 5455 self._match_texts(("MAX", "MIN")) 5456 max = self._prev.text.upper() != "MIN" 5457 return self.expression( 5458 exp.HavingMax, this=this, expression=self._parse_column(), max=max 5459 ) 5460 5461 return this 5462 5463 def _parse_window( 5464 self, this: t.Optional[exp.Expression], alias: bool = False 5465 ) -> t.Optional[exp.Expression]: 5466 func = this 5467 comments = func.comments if isinstance(func, exp.Expression) else None 5468 5469 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 5470 self._match(TokenType.WHERE) 5471 this = self.expression( 5472 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 5473 ) 5474 self._match_r_paren() 5475 5476 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 5477 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 5478 if self._match_text_seq("WITHIN", "GROUP"): 5479 order = self._parse_wrapped(self._parse_order) 5480 this = self.expression(exp.WithinGroup, this=this, expression=order) 5481 5482 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 5483 # Some dialects choose to implement and some do not. 5484 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 5485 5486 # There is some code above in _parse_lambda that handles 5487 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 5488 5489 # The below changes handle 5490 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 5491 5492 # Oracle allows both formats 5493 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 5494 # and Snowflake chose to do the same for familiarity 5495 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 5496 if isinstance(this, exp.AggFunc): 5497 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 5498 5499 if ignore_respect and ignore_respect is not this: 5500 ignore_respect.replace(ignore_respect.this) 5501 this = self.expression(ignore_respect.__class__, this=this) 5502 5503 this = self._parse_respect_or_ignore_nulls(this) 5504 5505 # bigquery select from window x AS (partition by ...) 5506 if alias: 5507 over = None 5508 self._match(TokenType.ALIAS) 5509 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 5510 return this 5511 else: 5512 over = self._prev.text.upper() 5513 5514 if comments and isinstance(func, exp.Expression): 5515 func.pop_comments() 5516 5517 if not self._match(TokenType.L_PAREN): 5518 return self.expression( 5519 exp.Window, 5520 comments=comments, 5521 this=this, 5522 alias=self._parse_id_var(False), 5523 over=over, 5524 ) 5525 5526 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 5527 5528 first = self._match(TokenType.FIRST) 5529 if self._match_text_seq("LAST"): 5530 first = False 5531 5532 partition, order = self._parse_partition_and_order() 5533 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 5534 5535 if kind: 5536 self._match(TokenType.BETWEEN) 5537 start = self._parse_window_spec() 5538 self._match(TokenType.AND) 5539 end = self._parse_window_spec() 5540 5541 spec = self.expression( 5542 exp.WindowSpec, 5543 kind=kind, 5544 start=start["value"], 5545 start_side=start["side"], 5546 end=end["value"], 5547 end_side=end["side"], 5548 ) 5549 else: 5550 spec = None 5551 5552 self._match_r_paren() 5553 5554 window = self.expression( 5555 exp.Window, 5556 comments=comments, 5557 this=this, 5558 partition_by=partition, 5559 order=order, 5560 spec=spec, 5561 alias=window_alias, 5562 over=over, 5563 first=first, 5564 ) 5565 5566 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 5567 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 5568 return self._parse_window(window, alias=alias) 5569 5570 return window 5571 5572 def _parse_partition_and_order( 5573 self, 5574 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 5575 return self._parse_partition_by(), self._parse_order() 5576 5577 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 5578 self._match(TokenType.BETWEEN) 5579 5580 return { 5581 "value": ( 5582 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 5583 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 5584 or self._parse_bitwise() 5585 ), 5586 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 5587 } 5588 5589 def _parse_alias( 5590 self, this: t.Optional[exp.Expression], explicit: bool = False 5591 ) -> t.Optional[exp.Expression]: 5592 any_token = self._match(TokenType.ALIAS) 5593 comments = self._prev_comments or [] 5594 5595 if explicit and not any_token: 5596 return this 5597 5598 if self._match(TokenType.L_PAREN): 5599 aliases = self.expression( 5600 exp.Aliases, 5601 comments=comments, 5602 this=this, 5603 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 5604 ) 5605 self._match_r_paren(aliases) 5606 return aliases 5607 5608 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 5609 self.STRING_ALIASES and self._parse_string_as_identifier() 5610 ) 5611 5612 if alias: 5613 comments.extend(alias.pop_comments()) 5614 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 5615 column = this.this 5616 5617 # Moves the comment next to the alias in `expr /* comment */ AS alias` 5618 if not this.comments and column and column.comments: 5619 this.comments = column.pop_comments() 5620 5621 return this 5622 5623 def _parse_id_var( 5624 self, 5625 any_token: bool = True, 5626 tokens: t.Optional[t.Collection[TokenType]] = None, 5627 ) -> t.Optional[exp.Expression]: 5628 expression = self._parse_identifier() 5629 if not expression and ( 5630 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 5631 ): 5632 quoted = self._prev.token_type == TokenType.STRING 5633 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 5634 5635 return expression 5636 5637 def _parse_string(self) -> t.Optional[exp.Expression]: 5638 if self._match_set(self.STRING_PARSERS): 5639 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 5640 return self._parse_placeholder() 5641 5642 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 5643 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 5644 5645 def _parse_number(self) -> t.Optional[exp.Expression]: 5646 if self._match_set(self.NUMERIC_PARSERS): 5647 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 5648 return self._parse_placeholder() 5649 5650 def _parse_identifier(self) -> t.Optional[exp.Expression]: 5651 if self._match(TokenType.IDENTIFIER): 5652 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 5653 return self._parse_placeholder() 5654 5655 def _parse_var( 5656 self, 5657 any_token: bool = False, 5658 tokens: t.Optional[t.Collection[TokenType]] = None, 5659 upper: bool = False, 5660 ) -> t.Optional[exp.Expression]: 5661 if ( 5662 (any_token and self._advance_any()) 5663 or self._match(TokenType.VAR) 5664 or (self._match_set(tokens) if tokens else False) 5665 ): 5666 return self.expression( 5667 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 5668 ) 5669 return self._parse_placeholder() 5670 5671 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 5672 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 5673 self._advance() 5674 return self._prev 5675 return None 5676 5677 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 5678 return self._parse_var() or self._parse_string() 5679 5680 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 5681 return self._parse_primary() or self._parse_var(any_token=True) 5682 5683 def _parse_null(self) -> t.Optional[exp.Expression]: 5684 if self._match_set(self.NULL_TOKENS): 5685 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 5686 return self._parse_placeholder() 5687 5688 def _parse_boolean(self) -> t.Optional[exp.Expression]: 5689 if self._match(TokenType.TRUE): 5690 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 5691 if self._match(TokenType.FALSE): 5692 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 5693 return self._parse_placeholder() 5694 5695 def _parse_star(self) -> t.Optional[exp.Expression]: 5696 if self._match(TokenType.STAR): 5697 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 5698 return self._parse_placeholder() 5699 5700 def _parse_parameter(self) -> exp.Parameter: 5701 this = self._parse_identifier() or self._parse_primary_or_var() 5702 return self.expression(exp.Parameter, this=this) 5703 5704 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 5705 if self._match_set(self.PLACEHOLDER_PARSERS): 5706 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 5707 if placeholder: 5708 return placeholder 5709 self._advance(-1) 5710 return None 5711 5712 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 5713 if not self._match_texts(keywords): 5714 return None 5715 if self._match(TokenType.L_PAREN, advance=False): 5716 return self._parse_wrapped_csv(self._parse_expression) 5717 5718 expression = self._parse_expression() 5719 return [expression] if expression else None 5720 5721 def _parse_csv( 5722 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 5723 ) -> t.List[exp.Expression]: 5724 parse_result = parse_method() 5725 items = [parse_result] if parse_result is not None else [] 5726 5727 while self._match(sep): 5728 self._add_comments(parse_result) 5729 parse_result = parse_method() 5730 if parse_result is not None: 5731 items.append(parse_result) 5732 5733 return items 5734 5735 def _parse_tokens( 5736 self, parse_method: t.Callable, expressions: t.Dict 5737 ) -> t.Optional[exp.Expression]: 5738 this = parse_method() 5739 5740 while self._match_set(expressions): 5741 this = self.expression( 5742 expressions[self._prev.token_type], 5743 this=this, 5744 comments=self._prev_comments, 5745 expression=parse_method(), 5746 ) 5747 5748 return this 5749 5750 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 5751 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 5752 5753 def _parse_wrapped_csv( 5754 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 5755 ) -> t.List[exp.Expression]: 5756 return self._parse_wrapped( 5757 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 5758 ) 5759 5760 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 5761 wrapped = self._match(TokenType.L_PAREN) 5762 if not wrapped and not optional: 5763 self.raise_error("Expecting (") 5764 parse_result = parse_method() 5765 if wrapped: 5766 self._match_r_paren() 5767 return parse_result 5768 5769 def _parse_expressions(self) -> t.List[exp.Expression]: 5770 return self._parse_csv(self._parse_expression) 5771 5772 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 5773 return self._parse_select() or self._parse_set_operations( 5774 self._parse_expression() if alias else self._parse_conjunction() 5775 ) 5776 5777 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 5778 return self._parse_query_modifiers( 5779 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 5780 ) 5781 5782 def _parse_transaction(self) -> exp.Transaction | exp.Command: 5783 this = None 5784 if self._match_texts(self.TRANSACTION_KIND): 5785 this = self._prev.text 5786 5787 self._match_texts(("TRANSACTION", "WORK")) 5788 5789 modes = [] 5790 while True: 5791 mode = [] 5792 while self._match(TokenType.VAR): 5793 mode.append(self._prev.text) 5794 5795 if mode: 5796 modes.append(" ".join(mode)) 5797 if not self._match(TokenType.COMMA): 5798 break 5799 5800 return self.expression(exp.Transaction, this=this, modes=modes) 5801 5802 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 5803 chain = None 5804 savepoint = None 5805 is_rollback = self._prev.token_type == TokenType.ROLLBACK 5806 5807 self._match_texts(("TRANSACTION", "WORK")) 5808 5809 if self._match_text_seq("TO"): 5810 self._match_text_seq("SAVEPOINT") 5811 savepoint = self._parse_id_var() 5812 5813 if self._match(TokenType.AND): 5814 chain = not self._match_text_seq("NO") 5815 self._match_text_seq("CHAIN") 5816 5817 if is_rollback: 5818 return self.expression(exp.Rollback, savepoint=savepoint) 5819 5820 return self.expression(exp.Commit, chain=chain) 5821 5822 def _parse_refresh(self) -> exp.Refresh: 5823 self._match(TokenType.TABLE) 5824 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 5825 5826 def _parse_add_column(self) -> t.Optional[exp.Expression]: 5827 if not self._match_text_seq("ADD"): 5828 return None 5829 5830 self._match(TokenType.COLUMN) 5831 exists_column = self._parse_exists(not_=True) 5832 expression = self._parse_field_def() 5833 5834 if expression: 5835 expression.set("exists", exists_column) 5836 5837 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 5838 if self._match_texts(("FIRST", "AFTER")): 5839 position = self._prev.text 5840 column_position = self.expression( 5841 exp.ColumnPosition, this=self._parse_column(), position=position 5842 ) 5843 expression.set("position", column_position) 5844 5845 return expression 5846 5847 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 5848 drop = self._match(TokenType.DROP) and self._parse_drop() 5849 if drop and not isinstance(drop, exp.Command): 5850 drop.set("kind", drop.args.get("kind", "COLUMN")) 5851 return drop 5852 5853 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 5854 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 5855 return self.expression( 5856 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 5857 ) 5858 5859 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 5860 index = self._index - 1 5861 5862 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 5863 return self._parse_csv( 5864 lambda: self.expression( 5865 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 5866 ) 5867 ) 5868 5869 self._retreat(index) 5870 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 5871 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 5872 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 5873 5874 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 5875 if self._match_texts(self.ALTER_ALTER_PARSERS): 5876 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 5877 5878 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 5879 # keyword after ALTER we default to parsing this statement 5880 self._match(TokenType.COLUMN) 5881 column = self._parse_field(any_token=True) 5882 5883 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 5884 return self.expression(exp.AlterColumn, this=column, drop=True) 5885 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 5886 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 5887 if self._match(TokenType.COMMENT): 5888 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 5889 5890 self._match_text_seq("SET", "DATA") 5891 self._match_text_seq("TYPE") 5892 return self.expression( 5893 exp.AlterColumn, 5894 this=column, 5895 dtype=self._parse_types(), 5896 collate=self._match(TokenType.COLLATE) and self._parse_term(), 5897 using=self._match(TokenType.USING) and self._parse_conjunction(), 5898 ) 5899 5900 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 5901 if self._match_texts(("ALL", "EVEN", "AUTO")): 5902 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 5903 5904 self._match_text_seq("KEY", "DISTKEY") 5905 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 5906 5907 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 5908 if compound: 5909 self._match_text_seq("SORTKEY") 5910 5911 if self._match(TokenType.L_PAREN, advance=False): 5912 return self.expression( 5913 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 5914 ) 5915 5916 self._match_texts(("AUTO", "NONE")) 5917 return self.expression( 5918 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 5919 ) 5920 5921 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 5922 index = self._index - 1 5923 5924 partition_exists = self._parse_exists() 5925 if self._match(TokenType.PARTITION, advance=False): 5926 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 5927 5928 self._retreat(index) 5929 return self._parse_csv(self._parse_drop_column) 5930 5931 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 5932 if self._match(TokenType.COLUMN): 5933 exists = self._parse_exists() 5934 old_column = self._parse_column() 5935 to = self._match_text_seq("TO") 5936 new_column = self._parse_column() 5937 5938 if old_column is None or to is None or new_column is None: 5939 return None 5940 5941 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 5942 5943 self._match_text_seq("TO") 5944 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 5945 5946 def _parse_alter(self) -> exp.AlterTable | exp.Command: 5947 start = self._prev 5948 5949 if not self._match(TokenType.TABLE): 5950 return self._parse_as_command(start) 5951 5952 exists = self._parse_exists() 5953 only = self._match_text_seq("ONLY") 5954 this = self._parse_table(schema=True) 5955 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 5956 5957 if self._next: 5958 self._advance() 5959 5960 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 5961 if parser: 5962 actions = ensure_list(parser(self)) 5963 options = self._parse_csv(self._parse_property) 5964 5965 if not self._curr and actions: 5966 return self.expression( 5967 exp.AlterTable, 5968 this=this, 5969 exists=exists, 5970 actions=actions, 5971 only=only, 5972 options=options, 5973 cluster=cluster, 5974 ) 5975 5976 return self._parse_as_command(start) 5977 5978 def _parse_merge(self) -> exp.Merge: 5979 self._match(TokenType.INTO) 5980 target = self._parse_table() 5981 5982 if target and self._match(TokenType.ALIAS, advance=False): 5983 target.set("alias", self._parse_table_alias()) 5984 5985 self._match(TokenType.USING) 5986 using = self._parse_table() 5987 5988 self._match(TokenType.ON) 5989 on = self._parse_conjunction() 5990 5991 return self.expression( 5992 exp.Merge, 5993 this=target, 5994 using=using, 5995 on=on, 5996 expressions=self._parse_when_matched(), 5997 ) 5998 5999 def _parse_when_matched(self) -> t.List[exp.When]: 6000 whens = [] 6001 6002 while self._match(TokenType.WHEN): 6003 matched = not self._match(TokenType.NOT) 6004 self._match_text_seq("MATCHED") 6005 source = ( 6006 False 6007 if self._match_text_seq("BY", "TARGET") 6008 else self._match_text_seq("BY", "SOURCE") 6009 ) 6010 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 6011 6012 self._match(TokenType.THEN) 6013 6014 if self._match(TokenType.INSERT): 6015 _this = self._parse_star() 6016 if _this: 6017 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 6018 else: 6019 then = self.expression( 6020 exp.Insert, 6021 this=self._parse_value(), 6022 expression=self._match_text_seq("VALUES") and self._parse_value(), 6023 ) 6024 elif self._match(TokenType.UPDATE): 6025 expressions = self._parse_star() 6026 if expressions: 6027 then = self.expression(exp.Update, expressions=expressions) 6028 else: 6029 then = self.expression( 6030 exp.Update, 6031 expressions=self._match(TokenType.SET) 6032 and self._parse_csv(self._parse_equality), 6033 ) 6034 elif self._match(TokenType.DELETE): 6035 then = self.expression(exp.Var, this=self._prev.text) 6036 else: 6037 then = None 6038 6039 whens.append( 6040 self.expression( 6041 exp.When, 6042 matched=matched, 6043 source=source, 6044 condition=condition, 6045 then=then, 6046 ) 6047 ) 6048 return whens 6049 6050 def _parse_show(self) -> t.Optional[exp.Expression]: 6051 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6052 if parser: 6053 return parser(self) 6054 return self._parse_as_command(self._prev) 6055 6056 def _parse_set_item_assignment( 6057 self, kind: t.Optional[str] = None 6058 ) -> t.Optional[exp.Expression]: 6059 index = self._index 6060 6061 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6062 return self._parse_set_transaction(global_=kind == "GLOBAL") 6063 6064 left = self._parse_primary() or self._parse_column() 6065 assignment_delimiter = self._match_texts(("=", "TO")) 6066 6067 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6068 self._retreat(index) 6069 return None 6070 6071 right = self._parse_statement() or self._parse_id_var() 6072 this = self.expression(exp.EQ, this=left, expression=right) 6073 6074 return self.expression(exp.SetItem, this=this, kind=kind) 6075 6076 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6077 self._match_text_seq("TRANSACTION") 6078 characteristics = self._parse_csv( 6079 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6080 ) 6081 return self.expression( 6082 exp.SetItem, 6083 expressions=characteristics, 6084 kind="TRANSACTION", 6085 **{"global": global_}, # type: ignore 6086 ) 6087 6088 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6089 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6090 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6091 6092 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6093 index = self._index 6094 set_ = self.expression( 6095 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6096 ) 6097 6098 if self._curr: 6099 self._retreat(index) 6100 return self._parse_as_command(self._prev) 6101 6102 return set_ 6103 6104 def _parse_var_from_options( 6105 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6106 ) -> t.Optional[exp.Var]: 6107 start = self._curr 6108 if not start: 6109 return None 6110 6111 option = start.text.upper() 6112 continuations = options.get(option) 6113 6114 index = self._index 6115 self._advance() 6116 for keywords in continuations or []: 6117 if isinstance(keywords, str): 6118 keywords = (keywords,) 6119 6120 if self._match_text_seq(*keywords): 6121 option = f"{option} {' '.join(keywords)}" 6122 break 6123 else: 6124 if continuations or continuations is None: 6125 if raise_unmatched: 6126 self.raise_error(f"Unknown option {option}") 6127 6128 self._retreat(index) 6129 return None 6130 6131 return exp.var(option) 6132 6133 def _parse_as_command(self, start: Token) -> exp.Command: 6134 while self._curr: 6135 self._advance() 6136 text = self._find_sql(start, self._prev) 6137 size = len(start.text) 6138 self._warn_unsupported() 6139 return exp.Command(this=text[:size], expression=text[size:]) 6140 6141 def _parse_dict_property(self, this: str) -> exp.DictProperty: 6142 settings = [] 6143 6144 self._match_l_paren() 6145 kind = self._parse_id_var() 6146 6147 if self._match(TokenType.L_PAREN): 6148 while True: 6149 key = self._parse_id_var() 6150 value = self._parse_primary() 6151 6152 if not key and value is None: 6153 break 6154 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 6155 self._match(TokenType.R_PAREN) 6156 6157 self._match_r_paren() 6158 6159 return self.expression( 6160 exp.DictProperty, 6161 this=this, 6162 kind=kind.this if kind else None, 6163 settings=settings, 6164 ) 6165 6166 def _parse_dict_range(self, this: str) -> exp.DictRange: 6167 self._match_l_paren() 6168 has_min = self._match_text_seq("MIN") 6169 if has_min: 6170 min = self._parse_var() or self._parse_primary() 6171 self._match_text_seq("MAX") 6172 max = self._parse_var() or self._parse_primary() 6173 else: 6174 max = self._parse_var() or self._parse_primary() 6175 min = exp.Literal.number(0) 6176 self._match_r_paren() 6177 return self.expression(exp.DictRange, this=this, min=min, max=max) 6178 6179 def _parse_comprehension( 6180 self, this: t.Optional[exp.Expression] 6181 ) -> t.Optional[exp.Comprehension]: 6182 index = self._index 6183 expression = self._parse_column() 6184 if not self._match(TokenType.IN): 6185 self._retreat(index - 1) 6186 return None 6187 iterator = self._parse_column() 6188 condition = self._parse_conjunction() if self._match_text_seq("IF") else None 6189 return self.expression( 6190 exp.Comprehension, 6191 this=this, 6192 expression=expression, 6193 iterator=iterator, 6194 condition=condition, 6195 ) 6196 6197 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 6198 if self._match(TokenType.HEREDOC_STRING): 6199 return self.expression(exp.Heredoc, this=self._prev.text) 6200 6201 if not self._match_text_seq("$"): 6202 return None 6203 6204 tags = ["$"] 6205 tag_text = None 6206 6207 if self._is_connected(): 6208 self._advance() 6209 tags.append(self._prev.text.upper()) 6210 else: 6211 self.raise_error("No closing $ found") 6212 6213 if tags[-1] != "$": 6214 if self._is_connected() and self._match_text_seq("$"): 6215 tag_text = tags[-1] 6216 tags.append("$") 6217 else: 6218 self.raise_error("No closing $ found") 6219 6220 heredoc_start = self._curr 6221 6222 while self._curr: 6223 if self._match_text_seq(*tags, advance=False): 6224 this = self._find_sql(heredoc_start, self._prev) 6225 self._advance(len(tags)) 6226 return self.expression(exp.Heredoc, this=this, tag=tag_text) 6227 6228 self._advance() 6229 6230 self.raise_error(f"No closing {''.join(tags)} found") 6231 return None 6232 6233 def _find_parser( 6234 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 6235 ) -> t.Optional[t.Callable]: 6236 if not self._curr: 6237 return None 6238 6239 index = self._index 6240 this = [] 6241 while True: 6242 # The current token might be multiple words 6243 curr = self._curr.text.upper() 6244 key = curr.split(" ") 6245 this.append(curr) 6246 6247 self._advance() 6248 result, trie = in_trie(trie, key) 6249 if result == TrieResult.FAILED: 6250 break 6251 6252 if result == TrieResult.EXISTS: 6253 subparser = parsers[" ".join(this)] 6254 return subparser 6255 6256 self._retreat(index) 6257 return None 6258 6259 def _match(self, token_type, advance=True, expression=None): 6260 if not self._curr: 6261 return None 6262 6263 if self._curr.token_type == token_type: 6264 if advance: 6265 self._advance() 6266 self._add_comments(expression) 6267 return True 6268 6269 return None 6270 6271 def _match_set(self, types, advance=True): 6272 if not self._curr: 6273 return None 6274 6275 if self._curr.token_type in types: 6276 if advance: 6277 self._advance() 6278 return True 6279 6280 return None 6281 6282 def _match_pair(self, token_type_a, token_type_b, advance=True): 6283 if not self._curr or not self._next: 6284 return None 6285 6286 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 6287 if advance: 6288 self._advance(2) 6289 return True 6290 6291 return None 6292 6293 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6294 if not self._match(TokenType.L_PAREN, expression=expression): 6295 self.raise_error("Expecting (") 6296 6297 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 6298 if not self._match(TokenType.R_PAREN, expression=expression): 6299 self.raise_error("Expecting )") 6300 6301 def _match_texts(self, texts, advance=True): 6302 if self._curr and self._curr.text.upper() in texts: 6303 if advance: 6304 self._advance() 6305 return True 6306 return None 6307 6308 def _match_text_seq(self, *texts, advance=True): 6309 index = self._index 6310 for text in texts: 6311 if self._curr and self._curr.text.upper() == text: 6312 self._advance() 6313 else: 6314 self._retreat(index) 6315 return None 6316 6317 if not advance: 6318 self._retreat(index) 6319 6320 return True 6321 6322 def _replace_lambda( 6323 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 6324 ) -> t.Optional[exp.Expression]: 6325 if not node: 6326 return node 6327 6328 for column in node.find_all(exp.Column): 6329 if column.parts[0].name in lambda_variables: 6330 dot_or_id = column.to_dot() if column.table else column.this 6331 parent = column.parent 6332 6333 while isinstance(parent, exp.Dot): 6334 if not isinstance(parent.parent, exp.Dot): 6335 parent.replace(dot_or_id) 6336 break 6337 parent = parent.parent 6338 else: 6339 if column is node: 6340 node = dot_or_id 6341 else: 6342 column.replace(dot_or_id) 6343 return node 6344 6345 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 6346 start = self._prev 6347 6348 # Not to be confused with TRUNCATE(number, decimals) function call 6349 if self._match(TokenType.L_PAREN): 6350 self._retreat(self._index - 2) 6351 return self._parse_function() 6352 6353 # Clickhouse supports TRUNCATE DATABASE as well 6354 is_database = self._match(TokenType.DATABASE) 6355 6356 self._match(TokenType.TABLE) 6357 6358 exists = self._parse_exists(not_=False) 6359 6360 expressions = self._parse_csv( 6361 lambda: self._parse_table(schema=True, is_db_reference=is_database) 6362 ) 6363 6364 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6365 6366 if self._match_text_seq("RESTART", "IDENTITY"): 6367 identity = "RESTART" 6368 elif self._match_text_seq("CONTINUE", "IDENTITY"): 6369 identity = "CONTINUE" 6370 else: 6371 identity = None 6372 6373 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 6374 option = self._prev.text 6375 else: 6376 option = None 6377 6378 partition = self._parse_partition() 6379 6380 # Fallback case 6381 if self._curr: 6382 return self._parse_as_command(start) 6383 6384 return self.expression( 6385 exp.TruncateTable, 6386 expressions=expressions, 6387 is_database=is_database, 6388 exists=exists, 6389 cluster=cluster, 6390 identity=identity, 6391 option=option, 6392 partition=partition, 6393 ) 6394 6395 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 6396 this = self._parse_ordered(self._parse_opclass) 6397 6398 if not self._match(TokenType.WITH): 6399 return this 6400 6401 op = self._parse_var(any_token=True) 6402 6403 return self.expression(exp.WithOperator, this=this, op=op) 6404 6405 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 6406 opts = [] 6407 self._match(TokenType.EQ) 6408 self._match(TokenType.L_PAREN) 6409 while self._curr and not self._match(TokenType.R_PAREN): 6410 opts.append(self._parse_conjunction()) 6411 self._match(TokenType.COMMA) 6412 return opts 6413 6414 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 6415 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 6416 6417 options = [] 6418 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 6419 option = self._parse_unquoted_field() 6420 value = None 6421 6422 # Some options are defined as functions with the values as params 6423 if not isinstance(option, exp.Func): 6424 prev = self._prev.text.upper() 6425 # Different dialects might separate options and values by white space, "=" and "AS" 6426 self._match(TokenType.EQ) 6427 self._match(TokenType.ALIAS) 6428 6429 if prev == "FILE_FORMAT" and self._match(TokenType.L_PAREN): 6430 # Snowflake FILE_FORMAT case 6431 value = self._parse_wrapped_options() 6432 else: 6433 value = self._parse_unquoted_field() 6434 6435 param = self.expression(exp.CopyParameter, this=option, expression=value) 6436 options.append(param) 6437 6438 if sep: 6439 self._match(sep) 6440 6441 return options 6442 6443 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 6444 expr = self.expression(exp.Credentials) 6445 6446 if self._match_text_seq("STORAGE_INTEGRATION", advance=False): 6447 expr.set("storage", self._parse_conjunction()) 6448 if self._match_text_seq("CREDENTIALS"): 6449 # Snowflake supports CREDENTIALS = (...), while Redshift CREDENTIALS <string> 6450 creds = ( 6451 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 6452 ) 6453 expr.set("credentials", creds) 6454 if self._match_text_seq("ENCRYPTION"): 6455 expr.set("encryption", self._parse_wrapped_options()) 6456 if self._match_text_seq("IAM_ROLE"): 6457 expr.set("iam_role", self._parse_field()) 6458 if self._match_text_seq("REGION"): 6459 expr.set("region", self._parse_field()) 6460 6461 return expr 6462 6463 def _parse_file_location(self) -> t.Optional[exp.Expression]: 6464 return self._parse_field() 6465 6466 def _parse_copy(self) -> exp.Copy | exp.Command: 6467 start = self._prev 6468 6469 self._match(TokenType.INTO) 6470 6471 this = ( 6472 self._parse_conjunction() 6473 if self._match(TokenType.L_PAREN, advance=False) 6474 else self._parse_table(schema=True) 6475 ) 6476 6477 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 6478 6479 files = self._parse_csv(self._parse_file_location) 6480 credentials = self._parse_credentials() 6481 6482 self._match_text_seq("WITH") 6483 6484 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 6485 6486 # Fallback case 6487 if self._curr: 6488 return self._parse_as_command(start) 6489 6490 return self.expression( 6491 exp.Copy, 6492 this=this, 6493 kind=kind, 6494 credentials=credentials, 6495 files=files, 6496 params=params, 6497 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1194 def __init__( 1195 self, 1196 error_level: t.Optional[ErrorLevel] = None, 1197 error_message_context: int = 100, 1198 max_errors: int = 3, 1199 dialect: DialectType = None, 1200 ): 1201 from sqlglot.dialects import Dialect 1202 1203 self.error_level = error_level or ErrorLevel.IMMEDIATE 1204 self.error_message_context = error_message_context 1205 self.max_errors = max_errors 1206 self.dialect = Dialect.get_or_raise(dialect) 1207 self.reset()
1219 def parse( 1220 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1221 ) -> t.List[t.Optional[exp.Expression]]: 1222 """ 1223 Parses a list of tokens and returns a list of syntax trees, one tree 1224 per parsed SQL statement. 1225 1226 Args: 1227 raw_tokens: The list of tokens. 1228 sql: The original SQL string, used to produce helpful debug messages. 1229 1230 Returns: 1231 The list of the produced syntax trees. 1232 """ 1233 return self._parse( 1234 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1235 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1237 def parse_into( 1238 self, 1239 expression_types: exp.IntoType, 1240 raw_tokens: t.List[Token], 1241 sql: t.Optional[str] = None, 1242 ) -> t.List[t.Optional[exp.Expression]]: 1243 """ 1244 Parses a list of tokens into a given Expression type. If a collection of Expression 1245 types is given instead, this method will try to parse the token list into each one 1246 of them, stopping at the first for which the parsing succeeds. 1247 1248 Args: 1249 expression_types: The expression type(s) to try and parse the token list into. 1250 raw_tokens: The list of tokens. 1251 sql: The original SQL string, used to produce helpful debug messages. 1252 1253 Returns: 1254 The target Expression. 1255 """ 1256 errors = [] 1257 for expression_type in ensure_list(expression_types): 1258 parser = self.EXPRESSION_PARSERS.get(expression_type) 1259 if not parser: 1260 raise TypeError(f"No parser registered for {expression_type}") 1261 1262 try: 1263 return self._parse(parser, raw_tokens, sql) 1264 except ParseError as e: 1265 e.errors[0]["into_expression"] = expression_type 1266 errors.append(e) 1267 1268 raise ParseError( 1269 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1270 errors=merge_errors(errors), 1271 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1311 def check_errors(self) -> None: 1312 """Logs or raises any found errors, depending on the chosen error level setting.""" 1313 if self.error_level == ErrorLevel.WARN: 1314 for error in self.errors: 1315 logger.error(str(error)) 1316 elif self.error_level == ErrorLevel.RAISE and self.errors: 1317 raise ParseError( 1318 concat_messages(self.errors, self.max_errors), 1319 errors=merge_errors(self.errors), 1320 )
Logs or raises any found errors, depending on the chosen error level setting.
1322 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1323 """ 1324 Appends an error in the list of recorded errors or raises it, depending on the chosen 1325 error level setting. 1326 """ 1327 token = token or self._curr or self._prev or Token.string("") 1328 start = token.start 1329 end = token.end + 1 1330 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1331 highlight = self.sql[start:end] 1332 end_context = self.sql[end : end + self.error_message_context] 1333 1334 error = ParseError.new( 1335 f"{message}. Line {token.line}, Col: {token.col}.\n" 1336 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1337 description=message, 1338 line=token.line, 1339 col=token.col, 1340 start_context=start_context, 1341 highlight=highlight, 1342 end_context=end_context, 1343 ) 1344 1345 if self.error_level == ErrorLevel.IMMEDIATE: 1346 raise error 1347 1348 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1350 def expression( 1351 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1352 ) -> E: 1353 """ 1354 Creates a new, validated Expression. 1355 1356 Args: 1357 exp_class: The expression class to instantiate. 1358 comments: An optional list of comments to attach to the expression. 1359 kwargs: The arguments to set for the expression along with their respective values. 1360 1361 Returns: 1362 The target expression. 1363 """ 1364 instance = exp_class(**kwargs) 1365 instance.add_comments(comments) if comments else self._add_comments(instance) 1366 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1373 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1374 """ 1375 Validates an Expression, making sure that all its mandatory arguments are set. 1376 1377 Args: 1378 expression: The expression to validate. 1379 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1380 1381 Returns: 1382 The validated expression. 1383 """ 1384 if self.error_level != ErrorLevel.IGNORE: 1385 for error_message in expression.error_messages(args): 1386 self.raise_error(error_message) 1387 1388 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.