From 64106c4d3d4ddba8c7bc2af75376e6d3d3d75601 Mon Sep 17 00:00:00 2001
From:
Date: Mon, 29 Jun 2015 20:16:15 +0000
Subject: Update documentation
---
src/regex_syntax/lib.rs.html | 2465 ++++++++++++++++++++
src/regex_syntax/parser.rs.html | 4751 +++++++++++++++++++++++++++++++++++++++
2 files changed, 7216 insertions(+)
create mode 100644 src/regex_syntax/lib.rs.html
create mode 100644 src/regex_syntax/parser.rs.html
(limited to 'src/regex_syntax')
diff --git a/src/regex_syntax/lib.rs.html b/src/regex_syntax/lib.rs.html
new file mode 100644
index 0000000..7c8b82b
--- /dev/null
+++ b/src/regex_syntax/lib.rs.html
@@ -0,0 +1,2465 @@
+
+
+
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
+1101
+1102
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
+1151
+1152
+1153
+1154
+1155
+1156
+1157
+1158
+1159
+1160
+1161
+1162
+1163
+1164
+1165
+1166
+1167
+1168
+1169
+1170
+1171
+1172
+1173
+1174
+1175
+1176
+1177
+1178
+1179
+1180
+1181
+1182
+1183
+1184
+
+
+
+
+
+
+
+
+
+
+
+
+
+#![deny(missing_docs)]
+
+#[cfg(test)] extern crate quickcheck;
+#[cfg(test)] extern crate rand;
+
+mod parser;
+mod unicode;
+
+use std::char;
+use std::cmp::{Ordering, max, min};
+use std::fmt;
+use std::iter::IntoIterator;
+use std::ops::Deref;
+use std::slice;
+use std::vec;
+
+use unicode::case_folding;
+
+use self::Expr::*;
+use self::Repeater::*;
+
+pub use parser::is_punct;
+
+
+
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum Expr {
+
+ Empty,
+
+ Literal {
+
+ chars: Vec<char>,
+
+ casei: bool,
+ },
+
+ AnyChar,
+
+ AnyCharNoNL,
+
+ Class(CharClass),
+
+ StartLine,
+
+ EndLine,
+
+ StartText,
+
+ EndText,
+
+
+ WordBoundary,
+
+
+ NotWordBoundary,
+
+ Group {
+
+ e: Box<Expr>,
+
+ i: Option<usize>,
+
+ name: Option<String>,
+ },
+
+ Repeat {
+
+
+ e: Box<Expr>,
+
+ r: Repeater,
+
+
+ greedy: bool,
+ },
+
+
+
+
+ Concat(Vec<Expr>),
+
+
+
+
+ Alternate(Vec<Expr>),
+}
+
+type CaptureIndex = Option<usize>;
+
+type CaptureName = Option<String>;
+
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Repeater {
+
+ ZeroOrOne,
+
+ ZeroOrMore,
+
+ OneOrMore,
+
+
+
+ Range {
+
+ min: u32,
+
+ max: Option<u32>,
+ },
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CharClass {
+ ranges: Vec<ClassRange>,
+ casei: bool,
+}
+
+
+
+
+
+
+
+
+
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
+pub struct ClassRange {
+
+
+
+ pub start: char,
+
+
+
+
+ pub end: char,
+}
+
+impl Expr {
+
+ pub fn parse(s: &str) -> Result<Expr> {
+ parser::Parser::parse(s).map(|e| e.simplify())
+ }
+
+
+ fn can_repeat(&self) -> bool {
+ match *self {
+ Literal{..}
+ | AnyChar
+ | AnyCharNoNL
+ | Class(_)
+ | StartLine | EndLine | StartText | EndText
+ | WordBoundary | NotWordBoundary
+ | Group{..}
+ => true,
+ _ => false,
+ }
+ }
+
+ fn simplify(self) -> Expr {
+ fn combine_literals(es: &mut Vec<Expr>, e: Expr) {
+ match (es.pop(), e) {
+ (None, e) => es.push(e),
+ (Some(Literal { chars: mut chars1, casei: casei1 }),
+ Literal { chars: chars2, casei: casei2 }) => {
+ if casei1 == casei2 {
+ chars1.extend(chars2);
+ es.push(Literal { chars: chars1, casei: casei1 });
+ } else {
+ es.push(Literal { chars: chars1, casei: casei1 });
+ es.push(Literal { chars: chars2, casei: casei2 });
+ }
+ }
+ (Some(e1), e2) => {
+ es.push(e1);
+ es.push(e2);
+ }
+ }
+ }
+ match self {
+ Repeat { e, r, greedy } => Repeat {
+ e: Box::new(e.simplify()),
+ r: r,
+ greedy: greedy,
+ },
+ Group { e, i, name } => {
+ let e = e.simplify();
+ if i.is_none() && name.is_none() && e.can_repeat() {
+ e
+ } else {
+ Group { e: Box::new(e), i: i, name: name }
+ }
+ }
+ Concat(es) => {
+ let mut new_es = Vec::with_capacity(es.len());
+ for e in es {
+ combine_literals(&mut new_es, e.simplify());
+ }
+ if new_es.len() == 1 {
+ new_es.pop().unwrap()
+ } else {
+ Concat(new_es)
+ }
+ }
+ Alternate(es) => Alternate(es.into_iter()
+ .map(|e| e.simplify())
+ .collect()),
+ e => e,
+ }
+ }
+}
+
+impl Deref for CharClass {
+ type Target = Vec<ClassRange>;
+ fn deref(&self) -> &Vec<ClassRange> { &self.ranges }
+}
+
+impl IntoIterator for CharClass {
+ type Item = ClassRange;
+ type IntoIter = vec::IntoIter<ClassRange>;
+ fn into_iter(self) -> vec::IntoIter<ClassRange> { self.ranges.into_iter() }
+}
+
+impl<'a> IntoIterator for &'a CharClass {
+ type Item = &'a ClassRange;
+ type IntoIter = slice::Iter<'a, ClassRange>;
+ fn into_iter(self) -> slice::Iter<'a, ClassRange> { self.iter() }
+}
+
+impl CharClass {
+
+ fn new(ranges: Vec<ClassRange>) -> CharClass {
+ CharClass { ranges: ranges, casei: false }
+ }
+
+
+ fn empty() -> CharClass {
+ CharClass::new(Vec::new())
+ }
+
+
+
+
+
+ pub fn matches(&self, mut c: char) -> bool {
+ if self.is_case_insensitive() {
+ c = simple_case_fold(c)
+ }
+ self.binary_search_by(|range| c.partial_cmp(range).unwrap()).is_ok()
+ }
+
+
+
+
+
+
+ pub fn is_case_insensitive(&self) -> bool {
+ self.casei
+ }
+
+
+
+
+ fn to_empty(&self) -> CharClass {
+ CharClass { ranges: Vec::with_capacity(self.len()), casei: self.casei }
+ }
+
+
+ #[cfg(test)]
+ fn merge(mut self, other: CharClass) -> CharClass {
+ self.ranges.extend(other);
+ self.canonicalize()
+ }
+
+
+
+
+
+ fn canonicalize(mut self) -> CharClass {
+
+ self.ranges.sort();
+ let mut ordered = self.to_empty();
+ for candidate in self {
+
+
+
+ if let Some(or) = ordered.ranges.last_mut() {
+ if or.overlapping(candidate) {
+ *or = or.merge(candidate);
+ continue;
+ }
+ }
+ ordered.ranges.push(candidate);
+ }
+ ordered
+ }
+
+
+
+
+
+
+
+
+
+
+
+ fn negate(mut self) -> CharClass {
+ fn range(s: char, e: char) -> ClassRange { ClassRange::new(s, e) }
+
+
+ assert!(!self.casei);
+
+ if self.is_empty() { return self; }
+ self = self.canonicalize();
+ let mut inv = self.to_empty();
+ if self[0].start > '\x00' {
+ inv.ranges.push(range('\x00', dec_char(self[0].start)));
+ }
+ for win in self.windows(2) {
+ inv.ranges.push(range(inc_char(win[0].end),
+ dec_char(win[1].start)));
+ }
+ if self[self.len() - 1].end < char::MAX {
+ inv.ranges.push(range(inc_char(self[self.len() - 1].end),
+ char::MAX));
+ }
+ inv
+ }
+
+
+
+
+ fn case_fold(self) -> CharClass {
+ let mut folded = self.to_empty();
+ folded.casei = true;
+ for r in self {
+
+
+
+
+ if r.needs_case_folding() {
+ folded.ranges.extend(r.case_fold());
+ } else {
+ folded.ranges.push(r);
+ }
+ }
+ folded.canonicalize()
+ }
+}
+
+impl ClassRange {
+
+
+
+
+ fn new(start: char, end: char) -> ClassRange {
+ if start <= end {
+ ClassRange { start: start, end: end }
+ } else {
+ ClassRange { start: end, end: start }
+ }
+ }
+
+
+ fn one(c: char) -> ClassRange {
+ ClassRange { start: c, end: c }
+ }
+
+
+
+ fn overlapping(self, other: ClassRange) -> bool {
+ max(self.start, other.start) <= inc_char(min(self.end, other.end))
+ }
+
+
+ fn merge(self, other: ClassRange) -> ClassRange {
+ ClassRange {
+ start: min(self.start, other.start),
+ end: max(self.end, other.end),
+ }
+ }
+
+
+
+ fn needs_case_folding(self) -> bool {
+ case_folding::C_plus_S_table
+ .binary_search_by(|&(c, _)| self.partial_cmp(&c).unwrap()).is_ok()
+ }
+
+
+
+
+
+
+ fn case_fold(self) -> Vec<ClassRange> {
+ let (s, e) = (self.start as u32, self.end as u32 + 1);
+ let mut start = simple_case_fold(self.start);
+ let mut end = start;
+ let mut next_case_fold = self.start;
+ let mut ranges = Vec::with_capacity(100);
+ for mut c in (s+1..e).filter_map(char::from_u32) {
+ if c >= next_case_fold {
+ c = match simple_case_fold_result(c) {
+ Ok(i) => case_folding::C_plus_S_table[i].1,
+ Err(i) => {
+ if i < case_folding::C_plus_S_table.len() {
+ next_case_fold = case_folding::C_plus_S_table[i].0;
+ } else {
+ next_case_fold = '\u{10FFFF}'
+ }
+ c
+ }
+ };
+ }
+ if c != inc_char(end) {
+ ranges.push(ClassRange::new(start, end));
+ start = c;
+ }
+ end = c;
+ }
+ ranges.push(ClassRange::new(start, end));
+ ranges
+ }
+}
+
+impl PartialEq<char> for ClassRange {
+ #[inline]
+ fn eq(&self, other: &char) -> bool {
+ self.start <= *other && *other <= self.end
+ }
+}
+
+impl PartialEq<ClassRange> for char {
+ #[inline]
+ fn eq(&self, other: &ClassRange) -> bool {
+ other.eq(self)
+ }
+}
+
+impl PartialOrd<char> for ClassRange {
+ #[inline]
+ fn partial_cmp(&self, other: &char) -> Option<Ordering> {
+ Some(if self == other {
+ Ordering::Equal
+ } else if *other > self.end {
+ Ordering::Greater
+ } else {
+ Ordering::Less
+ })
+ }
+}
+
+impl PartialOrd<ClassRange> for char {
+ #[inline]
+ fn partial_cmp(&self, other: &ClassRange) -> Option<Ordering> {
+ other.partial_cmp(self).map(|o| o.reverse())
+ }
+}
+
+
+
+impl fmt::Display for Expr {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ Empty => write!(f, ""),
+ Literal { ref chars, casei } => {
+ if casei { try!(write!(f, "(?i:")); }
+ for &c in chars {
+ try!(write!(f, "{}", quote_char(c)));
+ }
+ if casei { try!(write!(f, ")")); }
+ Ok(())
+ }
+ AnyChar => write!(f, "(?s:.)"),
+ AnyCharNoNL => write!(f, "."),
+ Class(ref cls) => write!(f, "{}", cls),
+ StartLine => write!(f, "(?m:^)"),
+ EndLine => write!(f, "(?m:$)"),
+ StartText => write!(f, r"^"),
+ EndText => write!(f, r"$"),
+ WordBoundary => write!(f, r"\b"),
+ NotWordBoundary => write!(f, r"\B"),
+ Group { ref e, i: None, name: None } => write!(f, "(?:{})", e),
+ Group { ref e, name: None, .. } => write!(f, "({})", e),
+ Group { ref e, name: Some(ref n), .. } => {
+ write!(f, "(?P<{}>{})", n, e)
+ }
+ Repeat { ref e, r, greedy } => {
+ match &**e {
+ &Literal { ref chars, .. } if chars.len() > 1 => {
+ try!(write!(f, "(?:{}){}", e, r))
+ }
+ _ => try!(write!(f, "{}{}", e, r)),
+ }
+ if !greedy { try!(write!(f, "?")); }
+ Ok(())
+ }
+ Concat(ref es) => {
+ for e in es {
+ try!(write!(f, "{}", e));
+ }
+ Ok(())
+ }
+ Alternate(ref es) => {
+ for (i, e) in es.iter().enumerate() {
+ if i > 0 { try!(write!(f, "|")); }
+ try!(write!(f, "{}", e));
+ }
+ Ok(())
+ }
+ }
+ }
+}
+
+impl fmt::Display for Repeater {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ ZeroOrOne => write!(f, "?"),
+ ZeroOrMore => write!(f, "*"),
+ OneOrMore => write!(f, "+"),
+ Range { min: s, max: None } => write!(f, "{{{},}}", s),
+ Range { min: s, max: Some(e) } if s == e => write!(f, "{{{}}}", s),
+ Range { min: s, max: Some(e) } => write!(f, "{{{}, {}}}", s, e),
+ }
+ }
+}
+
+impl fmt::Display for CharClass {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ if self.casei {
+ try!(write!(f, "(?i:"));
+ }
+ try!(write!(f, "["));
+ for range in self.iter() {
+ try!(write!(f, "{}", range));
+ }
+ try!(write!(f, "]"));
+ if self.casei {
+ try!(write!(f, ")"));
+ }
+ Ok(())
+ }
+}
+
+impl fmt::Display for ClassRange {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}-{}", quote_char(self.start), quote_char(self.end))
+ }
+}
+
+
+pub type Result<T> = ::std::result::Result<T, Error>;
+
+
+
+
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct Error {
+ pos: usize,
+ surround: String,
+ kind: ErrorKind,
+}
+
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum ErrorKind {
+
+
+ DoubleFlagNegation,
+
+
+ DuplicateCaptureName(String),
+
+ EmptyAlternate,
+
+ EmptyCaptureName,
+
+ EmptyFlagNegation,
+
+ EmptyGroup,
+
+ InvalidBase10(String),
+
+
+ InvalidBase16(String),
+
+ InvalidCaptureName(String),
+
+
+ InvalidClassRange {
+
+ start: char,
+
+ end: char,
+ },
+
+
+ InvalidClassEscape(Expr),
+
+ InvalidRepeatRange {
+
+ min: u32,
+
+ max: u32,
+ },
+
+
+ InvalidScalarValue(u32),
+
+ MissingBase10,
+
+ RepeaterExpectsExpr,
+
+
+ RepeaterUnexpectedExpr(Expr),
+
+ UnclosedCaptureName(String),
+
+ UnclosedHex,
+
+ UnclosedParen,
+
+ UnclosedRepeat,
+
+ UnclosedUnicodeName,
+
+ UnexpectedClassEof,
+
+ UnexpectedEscapeEof,
+
+ UnexpectedFlagEof,
+
+ UnexpectedTwoDigitHexEof,
+
+ UnopenedParen,
+
+ UnrecognizedEscape(char),
+
+ UnrecognizedFlag(char),
+
+ UnrecognizedUnicodeClass(String),
+
+
+
+
+
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl Error {
+
+
+
+
+
+ pub fn position(&self) -> usize {
+ self.pos
+ }
+
+
+ pub fn kind(&self) -> &ErrorKind {
+ &self.kind
+ }
+}
+
+impl ErrorKind {
+ fn description(&self) -> &str {
+ use ErrorKind::*;
+ match *self {
+ DoubleFlagNegation => "double flag negation",
+ DuplicateCaptureName(_) => "duplicate capture name",
+ EmptyAlternate => "empty alternate",
+ EmptyCaptureName => "empty capture name",
+ EmptyFlagNegation => "flag negation without any flags",
+ EmptyGroup => "empty group (e.g., '()')",
+ InvalidBase10(_) => "invalid base 10 number",
+ InvalidBase16(_) => "invalid base 16 number",
+ InvalidCaptureName(_) => "invalid capture name",
+ InvalidClassRange{..} => "invalid character class range",
+ InvalidClassEscape(_) => "invalid escape sequence in class",
+ InvalidRepeatRange{..} => "invalid counted repetition range",
+ InvalidScalarValue(_) => "invalid Unicode scalar value",
+ MissingBase10 => "missing count in repetition operator",
+ RepeaterExpectsExpr => "repetition operator missing expression",
+ RepeaterUnexpectedExpr(_) => "expression cannot be repeated",
+ UnclosedCaptureName(_) => "unclosed capture group name",
+ UnclosedHex => "unclosed hexadecimal literal",
+ UnclosedParen => "unclosed parenthesis",
+ UnclosedRepeat => "unclosed counted repetition operator",
+ UnclosedUnicodeName => "unclosed Unicode class literal",
+ UnexpectedClassEof => "unexpected EOF in character class",
+ UnexpectedEscapeEof => "unexpected EOF in escape sequence",
+ UnexpectedFlagEof => "unexpected EOF in flags",
+ UnexpectedTwoDigitHexEof => "unexpected EOF in hex literal",
+ UnopenedParen => "unopened parenthesis",
+ UnrecognizedEscape(_) => "unrecognized escape sequence",
+ UnrecognizedFlag(_) => "unrecognized flag",
+ UnrecognizedUnicodeClass(_) => "unrecognized Unicode class name",
+ __Nonexhaustive => unreachable!(),
+ }
+ }
+}
+
+impl ::std::error::Error for Error {
+ fn description(&self) -> &str {
+ self.kind.description()
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "Error parsing regex near '{}' at character offset {}: {}",
+ self.surround, self.pos, self.kind)
+ }
+}
+
+impl fmt::Display for ErrorKind {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use ErrorKind::*;
+ match *self {
+ DoubleFlagNegation =>
+ write!(f, "Only one negation symbol is allowed in flags."),
+ DuplicateCaptureName(ref s) =>
+ write!(f, "Capture name '{}' is used more than once.", s),
+ EmptyAlternate =>
+ write!(f, "Alternations cannot be empty."),
+ EmptyCaptureName =>
+ write!(f, "Capture names cannot be empty."),
+ EmptyFlagNegation =>
+ write!(f, "Flag negation requires setting at least one flag."),
+ EmptyGroup =>
+ write!(f, "Empty regex groups (e.g., '()') are not allowed."),
+ InvalidBase10(ref s) =>
+ write!(f, "Not a valid base 10 number: '{}'", s),
+ InvalidBase16(ref s) =>
+ write!(f, "Not a valid base 16 number: '{}'", s),
+ InvalidCaptureName(ref s) =>
+ write!(f, "Invalid capture name: '{}'. Capture names must \
+ consist of [_a-zA-Z0-9] and are not allowed to \
+ start with with a number.", s),
+ InvalidClassRange { start, end } =>
+ write!(f, "Invalid character class range '{}-{}'. \
+ Character class ranges must start with the smaller \
+ character, but {} > {}", start, end, start, end),
+ InvalidClassEscape(ref e) =>
+ write!(f, "Invalid escape sequence in character \
+ class: '{}'.", e),
+ InvalidRepeatRange { min, max } =>
+ write!(f, "Invalid counted repetition range: {{{}, {}}}. \
+ Counted repetition ranges must start with the \
+ minimum, but {} > {}", min, max, min, max),
+ InvalidScalarValue(c) =>
+ write!(f, "Number does not correspond to a Unicode scalar \
+ value: '{}'.", c),
+ MissingBase10 =>
+ write!(f, "Missing maximum in counted reptition operator."),
+ RepeaterExpectsExpr =>
+ write!(f, "Missing expression for reptition operator."),
+ RepeaterUnexpectedExpr(ref e) =>
+ write!(f, "Invalid application of reptition operator to: \
+ '{}'.", e),
+ UnclosedCaptureName(ref s) =>
+ write!(f, "Capture name group for '{}' is not closed. \
+ (Missing a '>'.)", s),
+ UnclosedHex =>
+ write!(f, "Unclosed hexadecimal literal (missing a '}}')."),
+ UnclosedParen =>
+ write!(f, "Unclosed parenthesis."),
+ UnclosedRepeat =>
+ write!(f, "Unclosed counted repetition (missing a '}}')."),
+ UnclosedUnicodeName =>
+ write!(f, "Unclosed Unicode literal (missing a '}}')."),
+ UnexpectedClassEof =>
+ write!(f, "Character class was not closed before the end of \
+ the regex (missing a ']')."),
+ UnexpectedEscapeEof =>
+ write!(f, "Started an escape sequence that didn't finish \
+ before the end of the regex."),
+ UnexpectedFlagEof =>
+ write!(f, "Inline flag settings was not closed before the end \
+ of the regex (missing a ')' or ':')."),
+ UnexpectedTwoDigitHexEof =>
+ write!(f, "Unexpected end of two digit hexadecimal literal."),
+ UnopenedParen =>
+ write!(f, "Unopened parenthesis."),
+ UnrecognizedEscape(c) =>
+ write!(f, "Unrecognized escape sequence: '\\{}'.", c),
+ UnrecognizedFlag(c) =>
+ write!(f, "Unrecognized flag: '{}'. \
+ (Allowed flags: i, s, m, U, x.)", c),
+ UnrecognizedUnicodeClass(ref s) =>
+ write!(f, "Unrecognized Unicode class name: '{}'.", s),
+ __Nonexhaustive => unreachable!(),
+ }
+ }
+}
+
+
+
+
+
+
+
+#[doc(hidden)]
+pub fn simple_case_fold(c: char) -> char {
+ simple_case_fold_result(c)
+ .map(|i| case_folding::C_plus_S_table[i].1)
+ .unwrap_or(c)
+}
+
+
+
+
+
+fn simple_case_fold_result(c: char) -> ::std::result::Result<usize, usize> {
+ case_folding::C_plus_S_table.binary_search_by(|&(x, _)| x.cmp(&c))
+}
+
+
+
+
+
+pub fn quote(text: &str) -> String {
+ let mut quoted = String::with_capacity(text.len());
+ for c in text.chars() {
+ if parser::is_punct(c) {
+ quoted.push('\\');
+ }
+ quoted.push(c);
+ }
+ quoted
+}
+
+fn quote_char(c: char) -> String {
+ let mut s = String::new();
+ if parser::is_punct(c) {
+ s.push('\\');
+ }
+ s.push(c);
+ s
+}
+
+fn inc_char(c: char) -> char {
+ match c {
+ char::MAX => char::MAX,
+ '\u{D7FF}' => '\u{E000}',
+ c => char::from_u32(c as u32 + 1).unwrap(),
+ }
+}
+
+fn dec_char(c: char) -> char {
+ match c {
+ '\x00' => '\x00',
+ '\u{E000}' => '\u{D7FF}',
+ c => char::from_u32(c as u32 - 1).unwrap(),
+ }
+}
+
+
+#[doc(hidden)]
+pub fn is_word_char(c: char) -> bool {
+ match c {
+ '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z' => true,
+ _ => ::unicode::regex::PERLW.binary_search_by(|&(start, end)| {
+ if c >= start && c <= end {
+ Ordering::Equal
+ } else if start > c {
+ Ordering::Greater
+ } else {
+ Ordering::Less
+ }
+ }).is_ok(),
+ }
+}
+
+#[cfg(test)]
+mod properties;
+
+#[cfg(test)]
+mod tests {
+ use {CharClass, ClassRange};
+
+ fn class(ranges: &[(char, char)]) -> CharClass {
+ let ranges = ranges.iter().cloned()
+ .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
+ CharClass::new(ranges)
+ }
+
+ fn classi(ranges: &[(char, char)]) -> CharClass {
+ let mut cls = class(ranges);
+ cls.casei = true;
+ cls
+ }
+
+ #[test]
+ fn class_canon_no_change() {
+ let cls = class(&[('a', 'c'), ('x', 'z')]);
+ assert_eq!(cls.clone().canonicalize(), cls);
+ }
+
+ #[test]
+ fn class_canon_unordered() {
+ let cls = class(&[('x', 'z'), ('a', 'c')]);
+ assert_eq!(cls.canonicalize(), class(&[
+ ('a', 'c'), ('x', 'z'),
+ ]));
+ }
+
+ #[test]
+ fn class_canon_overlap() {
+ let cls = class(&[('x', 'z'), ('w', 'y')]);
+ assert_eq!(cls.canonicalize(), class(&[
+ ('w', 'z'),
+ ]));
+ }
+
+ #[test]
+ fn class_canon_overlap_many() {
+ let cls = class(&[
+ ('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
+ ('m', 'p'), ('l', 's'),
+ ]);
+ assert_eq!(cls.clone().canonicalize(), class(&[
+ ('a', 'j'), ('l', 's'),
+ ]));
+ }
+
+ #[test]
+ fn class_canon_overlap_many_case_fold() {
+ let cls = class(&[
+ ('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
+ ('M', 'P'), ('L', 'S'), ('c', 'f'),
+ ]);
+ assert_eq!(cls.case_fold(), classi(&[
+ ('a', 'j'), ('l', 's'),
+ ]));
+ }
+
+ #[test]
+ fn class_canon_overlap_boundary() {
+ let cls = class(&[('x', 'z'), ('u', 'w')]);
+ assert_eq!(cls.canonicalize(), class(&[
+ ('u', 'z'),
+ ]));
+ }
+
+ #[test]
+ fn class_canon_extreme_edge_case() {
+ let cls = class(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
+ assert_eq!(cls.canonicalize(), class(&[
+ ('\x00', '\u{10FFFF}'),
+ ]));
+ }
+
+ #[test]
+ fn class_canon_singles() {
+ let cls = class(&[('a', 'a'), ('b', 'b')]);
+ assert_eq!(cls.canonicalize(), class(&[('a', 'b')]));
+ }
+
+ #[test]
+ fn class_negate_single() {
+ let cls = class(&[('a', 'a')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_singles() {
+ let cls = class(&[('a', 'a'), ('b', 'b')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\x00', '\x60'), ('\x63', '\u{10FFFF}'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_multiples() {
+ let cls = class(&[('a', 'c'), ('x', 'z')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\x00', '\x60'), ('\x64', '\x77'), ('\x7b', '\u{10FFFF}'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_min_scalar() {
+ let cls = class(&[('\x00', 'a')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\x62', '\u{10FFFF}'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_max_scalar() {
+ let cls = class(&[('a', '\u{10FFFF}')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\x00', '\x60'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_everything() {
+ let cls = class(&[('\x00', '\u{10FFFF}')]);
+ assert_eq!(cls.negate(), class(&[]));
+ }
+
+ #[test]
+ fn class_negate_everything_sans_one() {
+ let cls = class(&[
+ ('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')
+ ]);
+ assert_eq!(cls.negate(), class(&[
+ ('\u{10FFFE}', '\u{10FFFE}'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_surrogates_min() {
+ let cls = class(&[('\x00', '\u{D7FF}')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\u{E000}', '\u{10FFFF}'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_surrogates_min_edge() {
+ let cls = class(&[('\x00', '\u{D7FE}')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\u{D7FF}', '\u{10FFFF}'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_surrogates_max() {
+ let cls = class(&[('\u{E000}', '\u{10FFFF}')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\x00', '\u{D7FF}'),
+ ]));
+ }
+
+ #[test]
+ fn class_negate_surrogates_max_edge() {
+ let cls = class(&[('\u{E001}', '\u{10FFFF}')]);
+ assert_eq!(cls.negate(), class(&[
+ ('\x00', '\u{E000}'),
+ ]));
+ }
+
+ #[test]
+ fn class_fold_retain_only_needed() {
+ let cls = class(&[('A', 'Z'), ('a', 'z')]);
+ assert_eq!(cls.case_fold(), classi(&[
+ ('a', 'z'),
+ ]));
+ }
+
+ #[test]
+ fn class_fold_az() {
+ let cls = class(&[('A', 'Z')]);
+ assert_eq!(cls.case_fold(), classi(&[
+ ('a', 'z'),
+ ]));
+ }
+
+ #[test]
+ fn class_fold_a_underscore() {
+ let cls = class(&[('A', 'A'), ('_', '_')]);
+ assert_eq!(cls.clone().canonicalize(), class(&[
+ ('A', 'A'), ('_', '_'),
+ ]));
+ assert_eq!(cls.case_fold(), classi(&[
+ ('_', '_'), ('a', 'a'),
+ ]));
+ }
+
+ #[test]
+ fn class_fold_a_equals() {
+ let cls = class(&[('A', 'A'), ('=', '=')]);
+ assert_eq!(cls.clone().canonicalize(), class(&[
+ ('=', '='), ('A', 'A'),
+ ]));
+ assert_eq!(cls.case_fold(), classi(&[
+ ('=', '='), ('a', 'a'),
+ ]));
+ }
+
+ #[test]
+ fn class_fold_no_folding_needed() {
+ let cls = class(&[('\x00', '\x10')]);
+ assert_eq!(cls.case_fold(), classi(&[
+ ('\x00', '\x10'),
+ ]));
+ }
+}
+
+
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
+1101
+1102
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
+1151
+1152
+1153
+1154
+1155
+1156
+1157
+1158
+1159
+1160
+1161
+1162
+1163
+1164
+1165
+1166
+1167
+1168
+1169
+1170
+1171
+1172
+1173
+1174
+1175
+1176
+1177
+1178
+1179
+1180
+1181
+1182
+1183
+1184
+1185
+1186
+1187
+1188
+1189
+1190
+1191
+1192
+1193
+1194
+1195
+1196
+1197
+1198
+1199
+1200
+1201
+1202
+1203
+1204
+1205
+1206
+1207
+1208
+1209
+1210
+1211
+1212
+1213
+1214
+1215
+1216
+1217
+1218
+1219
+1220
+1221
+1222
+1223
+1224
+1225
+1226
+1227
+1228
+1229
+1230
+1231
+1232
+1233
+1234
+1235
+1236
+1237
+1238
+1239
+1240
+1241
+1242
+1243
+1244
+1245
+1246
+1247
+1248
+1249
+1250
+1251
+1252
+1253
+1254
+1255
+1256
+1257
+1258
+1259
+1260
+1261
+1262
+1263
+1264
+1265
+1266
+1267
+1268
+1269
+1270
+1271
+1272
+1273
+1274
+1275
+1276
+1277
+1278
+1279
+1280
+1281
+1282
+1283
+1284
+1285
+1286
+1287
+1288
+1289
+1290
+1291
+1292
+1293
+1294
+1295
+1296
+1297
+1298
+1299
+1300
+1301
+1302
+1303
+1304
+1305
+1306
+1307
+1308
+1309
+1310
+1311
+1312
+1313
+1314
+1315
+1316
+1317
+1318
+1319
+1320
+1321
+1322
+1323
+1324
+1325
+1326
+1327
+1328
+1329
+1330
+1331
+1332
+1333
+1334
+1335
+1336
+1337
+1338
+1339
+1340
+1341
+1342
+1343
+1344
+1345
+1346
+1347
+1348
+1349
+1350
+1351
+1352
+1353
+1354
+1355
+1356
+1357
+1358
+1359
+1360
+1361
+1362
+1363
+1364
+1365
+1366
+1367
+1368
+1369
+1370
+1371
+1372
+1373
+1374
+1375
+1376
+1377
+1378
+1379
+1380
+1381
+1382
+1383
+1384
+1385
+1386
+1387
+1388
+1389
+1390
+1391
+1392
+1393
+1394
+1395
+1396
+1397
+1398
+1399
+1400
+1401
+1402
+1403
+1404
+1405
+1406
+1407
+1408
+1409
+1410
+1411
+1412
+1413
+1414
+1415
+1416
+1417
+1418
+1419
+1420
+1421
+1422
+1423
+1424
+1425
+1426
+1427
+1428
+1429
+1430
+1431
+1432
+1433
+1434
+1435
+1436
+1437
+1438
+1439
+1440
+1441
+1442
+1443
+1444
+1445
+1446
+1447
+1448
+1449
+1450
+1451
+1452
+1453
+1454
+1455
+1456
+1457
+1458
+1459
+1460
+1461
+1462
+1463
+1464
+1465
+1466
+1467
+1468
+1469
+1470
+1471
+1472
+1473
+1474
+1475
+1476
+1477
+1478
+1479
+1480
+1481
+1482
+1483
+1484
+1485
+1486
+1487
+1488
+1489
+1490
+1491
+1492
+1493
+1494
+1495
+1496
+1497
+1498
+1499
+1500
+1501
+1502
+1503
+1504
+1505
+1506
+1507
+1508
+1509
+1510
+1511
+1512
+1513
+1514
+1515
+1516
+1517
+1518
+1519
+1520
+1521
+1522
+1523
+1524
+1525
+1526
+1527
+1528
+1529
+1530
+1531
+1532
+1533
+1534
+1535
+1536
+1537
+1538
+1539
+1540
+1541
+1542
+1543
+1544
+1545
+1546
+1547
+1548
+1549
+1550
+1551
+1552
+1553
+1554
+1555
+1556
+1557
+1558
+1559
+1560
+1561
+1562
+1563
+1564
+1565
+1566
+1567
+1568
+1569
+1570
+1571
+1572
+1573
+1574
+1575
+1576
+1577
+1578
+1579
+1580
+1581
+1582
+1583
+1584
+1585
+1586
+1587
+1588
+1589
+1590
+1591
+1592
+1593
+1594
+1595
+1596
+1597
+1598
+1599
+1600
+1601
+1602
+1603
+1604
+1605
+1606
+1607
+1608
+1609
+1610
+1611
+1612
+1613
+1614
+1615
+1616
+1617
+1618
+1619
+1620
+1621
+1622
+1623
+1624
+1625
+1626
+1627
+1628
+1629
+1630
+1631
+1632
+1633
+1634
+1635
+1636
+1637
+1638
+1639
+1640
+1641
+1642
+1643
+1644
+1645
+1646
+1647
+1648
+1649
+1650
+1651
+1652
+1653
+1654
+1655
+1656
+1657
+1658
+1659
+1660
+1661
+1662
+1663
+1664
+1665
+1666
+1667
+1668
+1669
+1670
+1671
+1672
+1673
+1674
+1675
+1676
+1677
+1678
+1679
+1680
+1681
+1682
+1683
+1684
+1685
+1686
+1687
+1688
+1689
+1690
+1691
+1692
+1693
+1694
+1695
+1696
+1697
+1698
+1699
+1700
+1701
+1702
+1703
+1704
+1705
+1706
+1707
+1708
+1709
+1710
+1711
+1712
+1713
+1714
+1715
+1716
+1717
+1718
+1719
+1720
+1721
+1722
+1723
+1724
+1725
+1726
+1727
+1728
+1729
+1730
+1731
+1732
+1733
+1734
+1735
+1736
+1737
+1738
+1739
+1740
+1741
+1742
+1743
+1744
+1745
+1746
+1747
+1748
+1749
+1750
+1751
+1752
+1753
+1754
+1755
+1756
+1757
+1758
+1759
+1760
+1761
+1762
+1763
+1764
+1765
+1766
+1767
+1768
+1769
+1770
+1771
+1772
+1773
+1774
+1775
+1776
+1777
+1778
+1779
+1780
+1781
+1782
+1783
+1784
+1785
+1786
+1787
+1788
+1789
+1790
+1791
+1792
+1793
+1794
+1795
+1796
+1797
+1798
+1799
+1800
+1801
+1802
+1803
+1804
+1805
+1806
+1807
+1808
+1809
+1810
+1811
+1812
+1813
+1814
+1815
+1816
+1817
+1818
+1819
+1820
+1821
+1822
+1823
+1824
+1825
+1826
+1827
+1828
+1829
+1830
+1831
+1832
+1833
+1834
+1835
+1836
+1837
+1838
+1839
+1840
+1841
+1842
+1843
+1844
+1845
+1846
+1847
+1848
+1849
+1850
+1851
+1852
+1853
+1854
+1855
+1856
+1857
+1858
+1859
+1860
+1861
+1862
+1863
+1864
+1865
+1866
+1867
+1868
+1869
+1870
+1871
+1872
+1873
+1874
+1875
+1876
+1877
+1878
+1879
+1880
+1881
+1882
+1883
+1884
+1885
+1886
+1887
+1888
+1889
+1890
+1891
+1892
+1893
+1894
+1895
+1896
+1897
+1898
+1899
+1900
+1901
+1902
+1903
+1904
+1905
+1906
+1907
+1908
+1909
+1910
+1911
+1912
+1913
+1914
+1915
+1916
+1917
+1918
+1919
+1920
+1921
+1922
+1923
+1924
+1925
+1926
+1927
+1928
+1929
+1930
+1931
+1932
+1933
+1934
+1935
+1936
+1937
+1938
+1939
+1940
+1941
+1942
+1943
+1944
+1945
+1946
+1947
+1948
+1949
+1950
+1951
+1952
+1953
+1954
+1955
+1956
+1957
+1958
+1959
+1960
+1961
+1962
+1963
+1964
+1965
+1966
+1967
+1968
+1969
+1970
+1971
+1972
+1973
+1974
+1975
+1976
+1977
+1978
+1979
+1980
+1981
+1982
+1983
+1984
+1985
+1986
+1987
+1988
+1989
+1990
+1991
+1992
+1993
+1994
+1995
+1996
+1997
+1998
+1999
+2000
+2001
+2002
+2003
+2004
+2005
+2006
+2007
+2008
+2009
+2010
+2011
+2012
+2013
+2014
+2015
+2016
+2017
+2018
+2019
+2020
+2021
+2022
+2023
+2024
+2025
+2026
+2027
+2028
+2029
+2030
+2031
+2032
+2033
+2034
+2035
+2036
+2037
+2038
+2039
+2040
+2041
+2042
+2043
+2044
+2045
+2046
+2047
+2048
+2049
+2050
+2051
+2052
+2053
+2054
+2055
+2056
+2057
+2058
+2059
+2060
+2061
+2062
+2063
+2064
+2065
+2066
+2067
+2068
+2069
+2070
+2071
+2072
+2073
+2074
+2075
+2076
+2077
+2078
+2079
+2080
+2081
+2082
+2083
+2084
+2085
+2086
+2087
+2088
+2089
+2090
+2091
+2092
+2093
+2094
+2095
+2096
+2097
+2098
+2099
+2100
+2101
+2102
+2103
+2104
+2105
+2106
+2107
+2108
+2109
+2110
+2111
+2112
+2113
+2114
+2115
+2116
+2117
+2118
+2119
+2120
+2121
+2122
+2123
+2124
+2125
+2126
+2127
+2128
+2129
+2130
+2131
+2132
+2133
+2134
+2135
+2136
+2137
+2138
+2139
+2140
+2141
+2142
+2143
+2144
+2145
+2146
+2147
+2148
+2149
+2150
+2151
+2152
+2153
+2154
+2155
+2156
+2157
+2158
+2159
+2160
+2161
+2162
+2163
+2164
+2165
+2166
+2167
+2168
+2169
+2170
+2171
+2172
+2173
+2174
+2175
+2176
+2177
+2178
+2179
+2180
+2181
+2182
+2183
+2184
+2185
+2186
+2187
+2188
+2189
+2190
+2191
+2192
+2193
+2194
+2195
+2196
+2197
+2198
+2199
+2200
+2201
+2202
+2203
+2204
+2205
+2206
+2207
+2208
+2209
+2210
+2211
+2212
+2213
+2214
+2215
+2216
+2217
+2218
+2219
+2220
+2221
+2222
+2223
+2224
+2225
+2226
+2227
+2228
+2229
+2230
+2231
+2232
+2233
+2234
+2235
+2236
+2237
+2238
+2239
+2240
+2241
+2242
+2243
+2244
+2245
+2246
+2247
+2248
+2249
+2250
+2251
+2252
+2253
+2254
+2255
+2256
+2257
+2258
+2259
+2260
+2261
+2262
+2263
+2264
+2265
+2266
+2267
+2268
+2269
+2270
+2271
+2272
+2273
+2274
+2275
+2276
+2277
+2278
+2279
+2280
+2281
+2282
+2283
+2284
+2285
+2286
+2287
+2288
+2289
+2290
+2291
+2292
+2293
+2294
+2295
+2296
+2297
+2298
+2299
+2300
+2301
+2302
+2303
+2304
+2305
+2306
+2307
+2308
+2309
+2310
+2311
+2312
+2313
+2314
+2315
+2316
+2317
+2318
+2319
+2320
+2321
+2322
+2323
+2324
+2325
+2326
+2327
+
+
+
+
+
+
+
+
+
+
+
+use std::cmp::{max, min};
+
+use unicode::regex::UNICODE_CLASSES;
+
+use {
+ Expr, Repeater, CharClass, ClassRange, CaptureIndex, CaptureName,
+ Error, ErrorKind, Result,
+};
+
+
+
+
+
+
+
+#[derive(Debug)]
+pub struct Parser {
+ chars: Vec<char>,
+ chari: usize,
+ stack: Vec<Build>,
+ caps: usize,
+ names: Vec<String>,
+ flags: Flags,
+}
+
+
+
+
+
+
+
+#[derive(Debug)]
+enum Build {
+ Expr(Expr),
+ LeftParen {
+ i: CaptureIndex,
+ name: CaptureName,
+ chari: usize,
+ old_flags: Flags,
+ },
+}
+
+
+#[derive(Clone, Copy, Debug)]
+struct Flags {
+ casei: bool,
+ multi: bool,
+ dotnl: bool,
+ swap_greed: bool,
+ ignore_space: bool,
+}
+
+
+impl Parser {
+ pub fn parse(s: &str) -> Result<Expr> {
+ Parser {
+ chars: s.chars().collect(),
+ chari: 0,
+ stack: vec![],
+ caps: 0,
+ names: vec![],
+ flags: Flags {
+ casei: false,
+ multi: false,
+ dotnl: false,
+ swap_greed: false,
+ ignore_space: false,
+ },
+ }.parse_expr()
+ }
+
+
+
+
+
+ fn parse_expr(mut self) -> Result<Expr> {
+ while !self.eof() {
+ let build_expr = match self.cur() {
+ '\\' => try!(self.parse_escape()),
+ '|' => { let e = try!(self.alternate()); self.bump(); e }
+ '?' => try!(self.parse_simple_repeat(Repeater::ZeroOrOne)),
+ '*' => try!(self.parse_simple_repeat(Repeater::ZeroOrMore)),
+ '+' => try!(self.parse_simple_repeat(Repeater::OneOrMore)),
+ '{' => try!(self.parse_counted_repeat()),
+ '[' => match self.maybe_parse_ascii() {
+ None => try!(self.parse_class()),
+ Some(cls) => Build::Expr(Expr::Class(cls)),
+ },
+ '^' => {
+ if self.flags.multi {
+ self.parse_one(Expr::StartLine)
+ } else {
+ self.parse_one(Expr::StartText)
+ }
+ }
+ '$' => {
+ if self.flags.multi {
+ self.parse_one(Expr::EndLine)
+ } else {
+ self.parse_one(Expr::EndText)
+ }
+ }
+ '.' => {
+ if self.flags.dotnl {
+ self.parse_one(Expr::AnyChar)
+ } else {
+ self.parse_one(Expr::AnyCharNoNL)
+ }
+ }
+ '(' => try!(self.parse_group()),
+ ')' => {
+ let (old_flags, e) = try!(self.close_paren());
+ self.bump();
+ self.flags = old_flags;
+ e
+ }
+ _ => Build::Expr(Expr::Literal {
+ chars: vec![self.bump()],
+ casei: self.flags.casei,
+ }),
+ };
+ if !build_expr.is_empty() {
+ let build_expr = self.maybe_class_case_fold(build_expr);
+ self.stack.push(build_expr);
+ }
+ }
+ self.finish_concat()
+ }
+
+
+
+
+
+ fn parse_escape(&mut self) -> Result<Build> {
+ self.bump();
+ if self.eof() {
+ return Err(self.err(ErrorKind::UnexpectedEscapeEof));
+ }
+ let c = self.cur();
+ if is_punct(c) {
+ return Ok(Build::Expr(Expr::Literal {
+ chars: vec![self.bump()],
+ casei: self.flags.casei,
+ }));
+ }
+
+ fn lit(c: char) -> Build {
+ Build::Expr(Expr::Literal { chars: vec![c], casei: false })
+ }
+ match c {
+ 'a' => { self.bump(); Ok(lit('\x07')) }
+ 'f' => { self.bump(); Ok(lit('\x0C')) }
+ 't' => { self.bump(); Ok(lit('\t')) }
+ 'n' => { self.bump(); Ok(lit('\n')) }
+ 'r' => { self.bump(); Ok(lit('\r')) }
+ 'v' => { self.bump(); Ok(lit('\x0B')) }
+ 'A' => { self.bump(); Ok(Build::Expr(Expr::StartText)) }
+ 'z' => { self.bump(); Ok(Build::Expr(Expr::EndText)) }
+ 'b' => { self.bump(); Ok(Build::Expr(Expr::WordBoundary)) }
+ 'B' => { self.bump(); Ok(Build::Expr(Expr::NotWordBoundary)) }
+ '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => self.parse_octal(),
+ 'x' => { self.bump(); self.parse_hex() }
+ 'p'|'P' => {
+ self.bump();
+ self.parse_unicode_class(c == 'P')
+ .map(|cls| Build::Expr(Expr::Class(cls)))
+ }
+ 'd'|'s'|'w'|'D'|'S'|'W' => {
+ self.bump();
+ Ok(Build::Expr(Expr::Class(self.parse_perl_class(c))))
+ }
+ c => Err(self.err(ErrorKind::UnrecognizedEscape(c))),
+ }
+ }
+
+
+
+
+
+
+
+
+
+
+ fn parse_group(&mut self) -> Result<Build> {
+ let chari = self.chari;
+ let mut name: CaptureName = None;
+ self.bump();
+ if self.bump_if("?P<") {
+ let n = try!(self.parse_group_name());
+ if self.names.iter().any(|n2| n2 == &n) {
+ return Err(self.err(ErrorKind::DuplicateCaptureName(n)));
+ }
+ self.names.push(n.clone());
+ name = Some(n);
+ } else if self.bump_if("?") {
+
+
+
+
+
+ return self.parse_group_flags(chari);
+ }
+ self.caps = checkadd(self.caps, 1);
+ Ok(Build::LeftParen {
+ i: Some(self.caps),
+ name: name,
+ chari: chari,
+ old_flags: self.flags,
+ })
+ }
+
+
+
+
+
+
+
+
+
+
+ fn parse_group_flags(&mut self, opening_chari: usize) -> Result<Build> {
+ let old_flags = self.flags;
+ let mut sign = true;
+ let mut saw_flag = false;
+ loop {
+ if self.eof() {
+
+ return Err(self.err(ErrorKind::UnexpectedFlagEof));
+ }
+ match self.cur() {
+ 'i' => { self.flags.casei = sign; saw_flag = true }
+ 'm' => { self.flags.multi = sign; saw_flag = true }
+ 's' => { self.flags.dotnl = sign; saw_flag = true }
+ 'U' => { self.flags.swap_greed = sign; saw_flag = true }
+ 'x' => { self.flags.ignore_space = sign; saw_flag = true }
+ '-' => {
+ if !sign {
+
+ return Err(self.err(ErrorKind::DoubleFlagNegation));
+ }
+ sign = false;
+ saw_flag = false;
+ }
+ ')' => {
+ if !saw_flag {
+
+ return Err(self.err(ErrorKind::EmptyFlagNegation));
+ }
+
+
+
+
+
+
+
+
+ self.bump();
+ return Ok(Build::Expr(Expr::Empty));
+ }
+ ':' => {
+ if !sign && !saw_flag {
+
+
+
+
+ return Err(self.err(ErrorKind::EmptyFlagNegation));
+ }
+ self.bump();
+ return Ok(Build::LeftParen {
+ i: None,
+ name: None,
+ chari: opening_chari,
+ old_flags: old_flags,
+ });
+ }
+
+ c => return Err(self.err(ErrorKind::UnrecognizedFlag(c))),
+ }
+ self.bump();
+ }
+ }
+
+
+
+
+
+ fn parse_group_name(&mut self) -> Result<String> {
+ let mut name = String::new();
+ while !self.eof() && !self.peek_is('>') {
+ name.push(self.bump());
+ }
+ if self.eof() {
+
+ return Err(self.err(ErrorKind::UnclosedCaptureName(name)));
+ }
+ let all_valid = name.chars().all(is_valid_capture_char);
+ match name.chars().next() {
+
+ None => Err(self.err(ErrorKind::EmptyCaptureName)),
+ Some(c) if (c >= '0' && c <= '9') || !all_valid => {
+
+
+ Err(self.err(ErrorKind::InvalidCaptureName(name)))
+ }
+ _ => {
+ self.bump();
+ Ok(name)
+ }
+ }
+ }
+
+
+
+
+
+ fn parse_counted_repeat(&mut self) -> Result<Build> {
+ let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr));
+ if !e.can_repeat() {
+
+ return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
+ }
+ self.bump();
+ let min = try!(self.parse_decimal(|c| c != ',' && c != '}'));
+ let mut max_opt = Some(min);
+ if self.bump_if(',') {
+ if self.peek_is('}') {
+ max_opt = None;
+ } else {
+ let max = try!(self.parse_decimal(|c| c != '}'));
+ if min > max {
+
+ return Err(self.err(ErrorKind::InvalidRepeatRange {
+ min: min,
+ max: max,
+ }));
+ }
+ max_opt = Some(max);
+ }
+ }
+ if !self.bump_if('}') {
+ Err(self.err(ErrorKind::UnclosedRepeat))
+ } else {
+ Ok(Build::Expr(Expr::Repeat {
+ e: Box::new(e),
+ r: Repeater::Range { min: min, max: max_opt },
+ greedy: !self.bump_if('?') ^ self.flags.swap_greed,
+ }))
+ }
+ }
+
+
+
+
+
+
+
+
+ fn parse_simple_repeat(&mut self, rep: Repeater) -> Result<Build> {
+ let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr));
+ if !e.can_repeat() {
+
+ return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
+ }
+ self.bump();
+ Ok(Build::Expr(Expr::Repeat {
+ e: Box::new(e),
+ r: rep,
+ greedy: !self.bump_if('?') ^ self.flags.swap_greed,
+ }))
+ }
+
+
+
+
+
+ fn parse_decimal<B: Bumpable>(&mut self, until: B) -> Result<u32> {
+ match self.bump_get(until) {
+
+ None => Err(self.err(ErrorKind::MissingBase10)),
+ Some(n) => {
+
+
+ let n = n.trim();
+ u32::from_str_radix(n, 10)
+ .map_err(|_| self.err(ErrorKind::InvalidBase10(n.into())))
+ }
+ }
+ }
+
+
+
+
+
+ fn parse_octal(&mut self) -> Result<Build> {
+ use std::char;
+ let mut i = 0;
+ let n = self.bump_get(|c| { i += 1; i <= 3 && c >= '0' && c <= '7' })
+ .expect("octal string");
+
+
+
+
+
+
+
+
+ let n = u32::from_str_radix(&n, 8).ok().expect("valid octal number");
+ Ok(Build::Expr(Expr::Literal {
+ chars: vec![char::from_u32(n).expect("Unicode scalar value")],
+ casei: self.flags.casei,
+ }))
+ }
+
+
+
+
+
+
+
+
+
+
+ fn parse_hex(&mut self) -> Result<Build> {
+ if self.bump_if('{') {
+ self.parse_hex_many_digits()
+ } else {
+ self.parse_hex_two_digits()
+ }
+ }
+
+
+
+
+
+ fn parse_hex_many_digits(&mut self) -> Result<Build> {
+ use std::char;
+
+ let s = self.bump_get(|c| c != '}').unwrap_or("".into());
+ let n = try!(u32::from_str_radix(&s, 16)
+ .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
+ let c = try!(char::from_u32(n)
+ .ok_or(self.err(ErrorKind::InvalidScalarValue(n))));
+ if !self.bump_if('}') {
+
+ return Err(self.err(ErrorKind::UnclosedHex));
+ }
+ Ok(Build::Expr(Expr::Literal {
+ chars: vec![c],
+ casei: self.flags.casei,
+ }))
+ }
+
+
+
+
+
+ fn parse_hex_two_digits(&mut self) -> Result<Build> {
+ use std::char;
+
+ let mut i = 0;
+ let s = self.bump_get(|_| { i += 1; i <= 2 }).unwrap_or("".into());
+ if s.len() < 2 {
+
+
+ return Err(self.err(ErrorKind::UnexpectedTwoDigitHexEof));
+ }
+ let n = try!(u32::from_str_radix(&s, 16)
+ .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
+ Ok(Build::Expr(Expr::Literal {
+
+ chars: vec![char::from_u32(n).expect("Unicode scalar value")],
+ casei: self.flags.casei,
+ }))
+ }
+
+
+
+
+
+ fn parse_class(&mut self) -> Result<Build> {
+ self.bump();
+ let negated = self.bump_if('^');
+ let mut class = CharClass::empty();
+ while self.bump_if('-') {
+ class.ranges.push(ClassRange::one('-'));
+ }
+ loop {
+ if self.eof() {
+
+ return Err(self.err(ErrorKind::UnexpectedClassEof));
+ }
+ match self.cur() {
+
+
+
+ ']' if class.len() > 0 => { self.bump(); break }
+ '[' => match self.maybe_parse_ascii() {
+ Some(class2) => class.ranges.extend(class2),
+ None => {
+ self.bump();
+ try!(self.parse_class_range(&mut class, '['))
+ }
+ },
+ '\\' => match try!(self.parse_escape()) {
+ Build::Expr(Expr::Class(class2)) => {
+ class.ranges.extend(class2);
+ }
+ Build::Expr(Expr::Literal { chars, .. }) => {
+ try!(self.parse_class_range(&mut class, chars[0]));
+ }
+ Build::Expr(e) => {
+ let err = ErrorKind::InvalidClassEscape(e);
+ return Err(self.err(err));
+ }
+
+ _ => unreachable!(),
+ },
+ start => {
+ self.bump();
+ try!(self.parse_class_range(&mut class, start));
+ }
+ }
+ }
+ if negated {
+ class = class.negate();
+ }
+ Ok(Build::Expr(Expr::Class(class.canonicalize())))
+ }
+
+
+
+
+
+
+
+
+
+
+
+ fn parse_class_range(&mut self, class: &mut CharClass, start: char)
+ -> Result<()> {
+ if !self.bump_if('-') {
+
+ class.ranges.push(ClassRange::one(start));
+ return Ok(());
+ }
+ if self.eof() {
+
+ return Err(self.err(ErrorKind::UnexpectedClassEof));
+ }
+ if self.peek_is(']') {
+
+
+ class.ranges.push(ClassRange::one(start));
+ class.ranges.push(ClassRange::one('-'));
+ return Ok(());
+ }
+
+
+
+ let end = match self.cur() {
+ '\\' => match try!(self.parse_escape()) {
+ Build::Expr(Expr::Literal { chars, .. }) => chars[0],
+ Build::Expr(e) => {
+ return Err(self.err(ErrorKind::InvalidClassEscape(e)));
+ }
+
+ _ => unreachable!(),
+ },
+ _ => self.bump(),
+ };
+ if end < start {
+
+ return Err(self.err(ErrorKind::InvalidClassRange {
+ start: start,
+ end: end,
+ }));
+ }
+ class.ranges.push(ClassRange::new(start, end));
+ Ok(())
+ }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fn maybe_parse_ascii(&mut self) -> Option<CharClass> {
+ fn parse(p: &mut Parser) -> Option<CharClass> {
+ p.bump();
+ if !p.bump_if(':') { return None; }
+ let negate = p.bump_if('^');
+ let name = match p.bump_get(|c| c != ':') {
+ None => return None,
+ Some(name) => name,
+ };
+ if !p.bump_if(":]") { return None; }
+ ascii_class(&name).map(|c| if !negate { c } else { c.negate() })
+ }
+ let start = self.chari;
+ match parse(self) {
+ None => { self.chari = start; None }
+ result => result,
+ }
+ }
+
+
+
+
+
+
+
+
+
+
+
+
+ fn parse_unicode_class(&mut self, neg: bool) -> Result<CharClass> {
+ let name =
+ if self.bump_if('{') {
+ let n = self.bump_get(|c| c != '}').unwrap_or("".into());
+ if n.is_empty() || !self.bump_if('}') {
+
+ return Err(self.err(ErrorKind::UnclosedUnicodeName));
+ }
+ n
+ } else {
+ if self.eof() {
+
+ return Err(self.err(ErrorKind::UnexpectedEscapeEof));
+ }
+ self.bump().to_string()
+ };
+ match unicode_class(&name) {
+ None => Err(self.err(ErrorKind::UnrecognizedUnicodeClass(name))),
+ Some(cls) => if neg { Ok(cls.negate()) } else { Ok(cls) },
+ }
+ }
+
+
+
+
+
+
+ fn parse_perl_class(&mut self, name: char) -> CharClass {
+ use unicode::regex::{PERLD, PERLS, PERLW};
+ match name {
+ 'd' => raw_class_to_expr(PERLD),
+ 'D' => raw_class_to_expr(PERLD).negate(),
+ 's' => raw_class_to_expr(PERLS),
+ 'S' => raw_class_to_expr(PERLS).negate(),
+ 'w' => raw_class_to_expr(PERLW),
+ 'W' => raw_class_to_expr(PERLW).negate(),
+ _ => unreachable!(),
+ }
+ }
+
+
+
+
+
+
+ fn parse_one(&mut self, e: Expr) -> Build {
+ self.bump();
+ Build::Expr(e)
+ }
+}
+
+
+impl Parser {
+ fn chars(&self) -> Chars {
+ Chars::new(&self.chars[self.chari..], self.flags.ignore_space)
+ }
+
+ fn bump(&mut self) -> char {
+ let c = self.cur();
+ self.chari = checkadd(self.chari, self.chars().next_count());
+ c
+ }
+
+ fn cur(&self) -> char { self.chars().next().unwrap() }
+
+ fn eof(&self) -> bool { self.chars().next().is_none() }
+
+ fn bump_get<B: Bumpable>(&mut self, s: B) -> Option<String> {
+ let n = s.match_end(self);
+ if n == 0 {
+ None
+ } else {
+ let end = checkadd(self.chari, n);
+ let s = self.chars[self.chari..end]
+ .iter().cloned().collect::<String>();
+ self.chari = end;
+ Some(s)
+ }
+ }
+
+ fn bump_if<B: Bumpable>(&mut self, s: B) -> bool {
+ let n = s.match_end(self);
+ if n == 0 {
+ false
+ } else {
+ self.chari = checkadd(self.chari, n);
+ true
+ }
+ }
+
+ fn peek_is<B: Bumpable>(&self, s: B) -> bool {
+ s.match_end(self) > 0
+ }
+
+ fn err(&self, kind: ErrorKind) -> Error {
+ self.errat(self.chari, kind)
+ }
+
+ fn errat(&self, pos: usize, kind: ErrorKind) -> Error {
+ Error { pos: pos, surround: self.windowat(pos), kind: kind }
+ }
+
+ fn windowat(&self, pos: usize) -> String {
+ let s = max(5, pos) - 5;
+ let e = min(self.chars.len(), checkadd(pos, 5));
+ self.chars[s..e].iter().cloned().collect()
+ }
+
+ fn pop(&mut self, expected: ErrorKind) -> Result<Expr> {
+ match self.stack.pop() {
+ None | Some(Build::LeftParen{..}) => Err(self.err(expected)),
+ Some(Build::Expr(e)) => Ok(e),
+ }
+ }
+
+
+
+
+
+
+ fn maybe_class_case_fold(&mut self, bexpr: Build) -> Build {
+ match bexpr {
+ Build::Expr(Expr::Class(cls)) => {
+ Build::Expr(Expr::Class(
+ if self.flags.casei && !cls.casei {
+ cls.case_fold()
+ } else {
+ cls
+ }
+ ))
+ }
+ bexpr => bexpr,
+ }
+ }
+}
+
+struct Chars<'a> {
+ chars: &'a [char],
+ cur: usize,
+ ignore_space: bool,
+ in_comment: bool,
+}
+
+impl<'a> Iterator for Chars<'a> {
+ type Item = char;
+ fn next(&mut self) -> Option<char> {
+ self.skip();
+ if self.cur < self.chars.len() {
+ let c = self.chars[self.cur];
+ self.cur = checkadd(self.cur, 1);
+ Some(c)
+ } else {
+ None
+ }
+ }
+}
+
+impl<'a> Chars<'a> {
+ fn new(chars: &[char], ignore_space: bool) -> Chars {
+ Chars {
+ chars: chars,
+ cur: 0,
+ ignore_space: ignore_space,
+ in_comment: false,
+ }
+ }
+
+
+ fn skip(&mut self) {
+ if !self.ignore_space { return; }
+ while self.cur < self.chars.len() {
+
+ let next_cur = checkadd(self.cur, 1);
+ if !self.in_comment && self.c() == '\\'
+ && next_cur < self.chars.len()
+ && self.chars[next_cur] == '#'
+ {
+ self.cur = next_cur;
+ break;
+ }
+
+ if !self.in_comment && self.c() == '#' {
+ self.in_comment = true;
+ } else if self.in_comment && self.c() == '\n' {
+ self.in_comment = false;
+ }
+
+ if self.in_comment || self.c().is_whitespace() {
+ self.cur = next_cur;
+ } else {
+ break;
+ }
+ }
+ }
+
+ fn c(&self) -> char {
+ self.chars[self.cur]
+ }
+
+ fn next_count(&mut self) -> usize {
+ self.next();
+ self.cur
+ }
+}
+
+
+impl Parser {
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fn alternate(&mut self) -> Result<Build> {
+ let mut concat = vec![];
+ let alts = |es| Ok(Build::Expr(Expr::Alternate(es)));
+ loop {
+ match self.stack.pop() {
+ None => {
+ if concat.is_empty() {
+
+ return Err(self.err(ErrorKind::EmptyAlternate));
+ }
+ return alts(vec![rev_concat(concat)]);
+ }
+ Some(e @ Build::LeftParen{..}) => {
+ if concat.is_empty() {
+
+ return Err(self.err(ErrorKind::EmptyAlternate));
+ }
+ self.stack.push(e);
+ return alts(vec![rev_concat(concat)]);
+ }
+ Some(Build::Expr(Expr::Alternate(mut es))) => {
+ if concat.is_empty() {
+
+ return Err(self.err(ErrorKind::EmptyAlternate));
+ }
+ es.push(rev_concat(concat));
+ return alts(es);
+ }
+ Some(Build::Expr(e)) => { concat.push(e); }
+ }
+ }
+ }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fn close_paren(&mut self) -> Result<(Flags, Build)> {
+ let mut concat = vec![];
+ loop {
+ match self.stack.pop() {
+
+ None => return Err(self.err(ErrorKind::UnopenedParen)),
+ Some(Build::LeftParen { i, name, old_flags, .. }) => {
+ if concat.is_empty() {
+
+ return Err(self.err(ErrorKind::EmptyGroup));
+ }
+ return Ok((old_flags, Build::Expr(Expr::Group {
+ e: Box::new(rev_concat(concat)),
+ i: i,
+ name: name,
+ })));
+ }
+ Some(Build::Expr(Expr::Alternate(mut es))) => {
+ if concat.is_empty() {
+
+ return Err(self.err(ErrorKind::EmptyAlternate));
+ }
+ es.push(rev_concat(concat));
+ match self.stack.pop() {
+
+ None => return Err(self.err(ErrorKind::UnopenedParen)),
+ Some(Build::Expr(_)) => unreachable!(),
+ Some(Build::LeftParen { i, name, old_flags, .. }) => {
+ return Ok((old_flags, Build::Expr(Expr::Group {
+ e: Box::new(Expr::Alternate(es)),
+ i: i,
+ name: name,
+ })));
+ }
+ }
+ }
+ Some(Build::Expr(e)) => { concat.push(e); }
+ }
+ }
+ }
+
+
+
+
+
+
+
+
+
+
+
+
+ fn finish_concat(&mut self) -> Result<Expr> {
+ let mut concat = vec![];
+ loop {
+ match self.stack.pop() {
+ None => { return Ok(rev_concat(concat)); }
+ Some(Build::LeftParen{ chari, ..}) => {
+
+ return Err(self.errat(chari, ErrorKind::UnclosedParen));
+ }
+ Some(Build::Expr(Expr::Alternate(mut es))) => {
+ if concat.is_empty() {
+
+ return Err(self.err(ErrorKind::EmptyAlternate));
+ }
+ es.push(rev_concat(concat));
+ return Ok(Expr::Alternate(es));
+ }
+ Some(Build::Expr(e)) => { concat.push(e); }
+ }
+ }
+ }
+}
+
+impl Build {
+ fn is_empty(&self) -> bool {
+ match *self {
+ Build::Expr(Expr::Empty) => true,
+ _ => false,
+ }
+ }
+}
+
+
+
+trait Bumpable {
+ fn match_end(self, p: &Parser) -> usize;
+}
+
+impl Bumpable for char {
+ fn match_end(self, p: &Parser) -> usize {
+ let mut chars = p.chars();
+ if chars.next().map(|c| c == self).unwrap_or(false) {
+ chars.cur
+ } else {
+ 0
+ }
+ }
+}
+
+impl<'a> Bumpable for &'a str {
+ fn match_end(self, p: &Parser) -> usize {
+ let mut search = self.chars();
+ let mut rest = p.chars();
+ let mut count = 0;
+ loop {
+ match (rest.next(), search.next()) {
+ (Some(c1), Some(c2)) if c1 == c2 => count = rest.cur,
+ (_, None) => return count,
+ _ => return 0,
+ }
+ }
+ }
+}
+
+impl<F: FnMut(char) -> bool> Bumpable for F {
+ fn match_end(mut self, p: &Parser) -> usize {
+ let mut chars = p.chars();
+ let mut count = 0;
+ while let Some(c) = chars.next() {
+ if !self(c) {
+ break
+ }
+ count = chars.cur;
+ }
+ count
+ }
+}
+
+
+
+fn rev_concat(mut exprs: Vec<Expr>) -> Expr {
+ if exprs.len() == 0 {
+ Expr::Empty
+ } else if exprs.len() == 1 {
+ exprs.pop().unwrap()
+ } else {
+ exprs.reverse();
+ Expr::Concat(exprs)
+ }
+}
+
+
+
+fn is_valid_capture_char(c: char) -> bool {
+ c == '_' || (c >= '0' && c <= '9')
+ || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+}
+
+
+#[doc(hidden)]
+pub fn is_punct(c: char) -> bool {
+ match c {
+ '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
+ '[' | ']' | '{' | '}' | '^' | '$' | '#' => true,
+ _ => false,
+ }
+}
+
+fn checkadd(x: usize, y: usize) -> usize {
+ x.checked_add(y).expect("regex length overflow")
+}
+
+fn unicode_class(name: &str) -> Option<CharClass> {
+ UNICODE_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
+ raw_class_to_expr(UNICODE_CLASSES[i].1)
+ })
+}
+
+fn ascii_class(name: &str) -> Option<CharClass> {
+ ASCII_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
+ raw_class_to_expr(ASCII_CLASSES[i].1)
+ })
+}
+
+fn raw_class_to_expr(raw: &[(char, char)]) -> CharClass {
+ let range = |&(s, e)| ClassRange { start: s, end: e };
+ CharClass::new(raw.iter().map(range).collect())
+}
+
+type Class = &'static [(char, char)];
+type NamedClasses = &'static [(&'static str, Class)];
+
+const ASCII_CLASSES: NamedClasses = &[
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ("alnum", &ALNUM),
+ ("alpha", &ALPHA),
+ ("ascii", &ASCII),
+ ("blank", &BLANK),
+ ("cntrl", &CNTRL),
+ ("digit", &DIGIT),
+ ("graph", &GRAPH),
+ ("lower", &LOWER),
+ ("print", &PRINT),
+ ("punct", &PUNCT),
+ ("space", &SPACE),
+ ("upper", &UPPER),
+ ("word", &WORD),
+ ("xdigit", &XDIGIT),
+];
+
+const ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
+const ALPHA: Class = &[('A', 'Z'), ('a', 'z')];
+const ASCII: Class = &[('\x00', '\x7F')];
+const BLANK: Class = &[(' ', ' '), ('\t', '\t')];
+const CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
+const DIGIT: Class = &[('0', '9')];
+const GRAPH: Class = &[('!', '~')];
+const LOWER: Class = &[('a', 'z')];
+const PRINT: Class = &[(' ', '~')];
+const PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
+const SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'),
+ ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')];
+const UPPER: Class = &[('A', 'Z')];
+const WORD: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z'), ('_', '_')];
+const XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];
+
+#[cfg(test)]
+mod tests {
+ use { CharClass, ClassRange, Expr, Repeater, ErrorKind };
+ use unicode::regex::{PERLD, PERLS, PERLW};
+ use super::Parser;
+ use super::{LOWER, UPPER};
+
+ static YI: &'static [(char, char)] = &[
+ ('\u{a000}', '\u{a48c}'), ('\u{a490}', '\u{a4c6}'),
+ ];
+
+ fn p(s: &str) -> Expr { Parser::parse(s).unwrap() }
+ fn lit(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: false } }
+ fn liti(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: true } }
+ fn b<T>(v: T) -> Box<T> { Box::new(v) }
+ fn c(es: &[Expr]) -> Expr { Expr::Concat(es.to_vec()) }
+
+ fn class(ranges: &[(char, char)]) -> CharClass {
+ let ranges = ranges.iter().cloned()
+ .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
+ CharClass::new(ranges)
+ }
+
+ fn classes(classes: &[&[(char, char)]]) -> CharClass {
+ let mut cls = CharClass::empty();
+ for &ranges in classes {
+ cls.ranges.extend(class(ranges));
+ }
+ cls.canonicalize()
+ }
+
+ #[test]
+ fn empty() {
+ assert_eq!(p(""), Expr::Empty);
+ }
+
+ #[test]
+ fn literal() {
+ assert_eq!(p("a"), lit('a'));
+ }
+
+ #[test]
+ fn literal_string() {
+ assert_eq!(p("ab"), Expr::Concat(vec![lit('a'), lit('b')]));
+ }
+
+ #[test]
+ fn start_literal() {
+ assert_eq!(p("^a"), Expr::Concat(vec![
+ Expr::StartText,
+ Expr::Literal { chars: vec!['a'], casei: false },
+ ]));
+ }
+
+ #[test]
+ fn repeat_zero_or_one_greedy() {
+ assert_eq!(p("a?"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::ZeroOrOne,
+ greedy: true,
+ });
+ }
+
+ #[test]
+ fn repeat_zero_or_one_greedy_concat() {
+ assert_eq!(p("ab?"), Expr::Concat(vec![
+ lit('a'),
+ Expr::Repeat {
+ e: b(lit('b')),
+ r: Repeater::ZeroOrOne,
+ greedy: true,
+ },
+ ]));
+ }
+
+ #[test]
+ fn repeat_zero_or_one_nongreedy() {
+ assert_eq!(p("a??"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::ZeroOrOne,
+ greedy: false,
+ });
+ }
+
+ #[test]
+ fn repeat_one_or_more_greedy() {
+ assert_eq!(p("a+"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::OneOrMore,
+ greedy: true,
+ });
+ }
+
+ #[test]
+ fn repeat_one_or_more_nongreedy() {
+ assert_eq!(p("a+?"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::OneOrMore,
+ greedy: false,
+ });
+ }
+
+ #[test]
+ fn repeat_zero_or_more_greedy() {
+ assert_eq!(p("a*"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::ZeroOrMore,
+ greedy: true,
+ });
+ }
+
+ #[test]
+ fn repeat_zero_or_more_nongreedy() {
+ assert_eq!(p("a*?"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::ZeroOrMore,
+ greedy: false,
+ });
+ }
+
+ #[test]
+ fn repeat_counted_exact() {
+ assert_eq!(p("a{5}"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::Range { min: 5, max: Some(5) },
+ greedy: true,
+ });
+ }
+
+ #[test]
+ fn repeat_counted_min() {
+ assert_eq!(p("a{5,}"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::Range { min: 5, max: None },
+ greedy: true,
+ });
+ }
+
+ #[test]
+ fn repeat_counted_min_max() {
+ assert_eq!(p("a{5,10}"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::Range { min: 5, max: Some(10) },
+ greedy: true,
+ });
+ }
+
+ #[test]
+ fn repeat_counted_exact_nongreedy() {
+ assert_eq!(p("a{5}?"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::Range { min: 5, max: Some(5) },
+ greedy: false,
+ });
+ }
+
+ #[test]
+ fn repeat_counted_min_nongreedy() {
+ assert_eq!(p("a{5,}?"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::Range { min: 5, max: None },
+ greedy: false,
+ });
+ }
+
+ #[test]
+ fn repeat_counted_min_max_nongreedy() {
+ assert_eq!(p("a{5,10}?"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::Range { min: 5, max: Some(10) },
+ greedy: false,
+ });
+ }
+
+ #[test]
+ fn repeat_counted_whitespace() {
+ assert_eq!(p("a{ 5 }"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::Range { min: 5, max: Some(5) },
+ greedy: true,
+ });
+ assert_eq!(p("a{ 5 , 10 }"), Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::Range { min: 5, max: Some(10) },
+ greedy: true,
+ });
+ }
+
+ #[test]
+ fn group_literal() {
+ assert_eq!(p("(a)"), Expr::Group {
+ e: b(lit('a')),
+ i: Some(1),
+ name: None,
+ });
+ }
+
+ #[test]
+ fn group_literal_concat() {
+ assert_eq!(p("(ab)"), Expr::Group {
+ e: b(c(&[lit('a'), lit('b')])),
+ i: Some(1),
+ name: None,
+ });
+ }
+
+ #[test]
+ fn alt_two() {
+ assert_eq!(p("a|b"), Expr::Alternate(vec![lit('a'), lit('b')]));
+ }
+
+ #[test]
+ fn alt_many() {
+ assert_eq!(p("a|b|c"), Expr::Alternate(vec![
+ lit('a'), lit('b'), lit('c'),
+ ]));
+ }
+
+ #[test]
+ fn alt_many_concat() {
+ assert_eq!(p("ab|bc|cd"), Expr::Alternate(vec![
+ c(&[lit('a'), lit('b')]),
+ c(&[lit('b'), lit('c')]),
+ c(&[lit('c'), lit('d')]),
+ ]));
+ }
+
+ #[test]
+ fn alt_group_two() {
+ assert_eq!(p("(a|b)"), Expr::Group {
+ e: b(Expr::Alternate(vec![lit('a'), lit('b')])),
+ i: Some(1),
+ name: None,
+ });
+ }
+
+ #[test]
+ fn alt_group_many() {
+ assert_eq!(p("(a|b|c)"), Expr::Group {
+ e: b(Expr::Alternate(vec![lit('a'), lit('b'), lit('c')])),
+ i: Some(1),
+ name: None,
+ });
+ }
+
+ #[test]
+ fn alt_group_many_concat() {
+ assert_eq!(p("(ab|bc|cd)"), Expr::Group {
+ e: b(Expr::Alternate(vec![
+ c(&[lit('a'), lit('b')]),
+ c(&[lit('b'), lit('c')]),
+ c(&[lit('c'), lit('d')]),
+ ])),
+ i: Some(1),
+ name: None,
+ });
+ }
+
+ #[test]
+ fn alt_group_nested() {
+ assert_eq!(p("(ab|(bc|(cd)))"), Expr::Group {
+ e: b(Expr::Alternate(vec![
+ c(&[lit('a'), lit('b')]),
+ Expr::Group {
+ e: b(Expr::Alternate(vec![
+ c(&[lit('b'), lit('c')]),
+ Expr::Group {
+ e: b(c(&[lit('c'), lit('d')])),
+ i: Some(3),
+ name: None,
+ }
+ ])),
+ i: Some(2),
+ name: None,
+ },
+ ])),
+ i: Some(1),
+ name: None,
+ });
+ }
+
+ #[test]
+ fn group_name() {
+ assert_eq!(p("(?P<foo>a)"), Expr::Group {
+ e: b(lit('a')),
+ i: Some(1),
+ name: Some("foo".into()),
+ });
+ }
+
+ #[test]
+ fn group_no_capture() {
+ assert_eq!(p("(?:a)"), Expr::Group {
+ e: b(lit('a')),
+ i: None,
+ name: None,
+ });
+ }
+
+ #[test]
+ fn group_flags() {
+ assert_eq!(p("(?i:a)"), Expr::Group {
+ e: b(liti('a')),
+ i: None,
+ name: None,
+ });
+ }
+
+ #[test]
+ fn group_flags_returned() {
+ assert_eq!(p("(?i:a)a"), c(&[
+ Expr::Group {
+ e: b(liti('a')),
+ i: None,
+ name: None,
+ },
+ lit('a'),
+ ]));
+ }
+
+ #[test]
+ fn group_flags_retained() {
+ assert_eq!(p("(?i)(?-i:a)a"), c(&[
+ Expr::Group {
+ e: b(lit('a')),
+ i: None,
+ name: None,
+ },
+ liti('a'),
+ ]));
+ }
+
+ #[test]
+ fn flags_inline() {
+ assert_eq!(p("(?i)a"), liti('a'));
+ }
+
+ #[test]
+ fn flags_inline_multiple() {
+ assert_eq!(p("(?is)a."), c(&[liti('a'), Expr::AnyChar]));
+ }
+
+ #[test]
+ fn flags_inline_multiline() {
+ assert_eq!(p("(?m)^(?-m)$"), c(&[Expr::StartLine, Expr::EndText]));
+ }
+
+ #[test]
+ fn flags_inline_swap_greed() {
+ assert_eq!(p("(?U)a*a*?(?i-U)a*a*?"), c(&[
+ Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::ZeroOrMore,
+ greedy: false,
+ },
+ Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::ZeroOrMore,
+ greedy: true,
+ },
+ Expr::Repeat {
+ e: b(liti('a')),
+ r: Repeater::ZeroOrMore,
+ greedy: true,
+ },
+ Expr::Repeat {
+ e: b(liti('a')),
+ r: Repeater::ZeroOrMore,
+ greedy: false,
+ },
+ ]));
+ }
+
+ #[test]
+ fn flags_inline_multiple_negate_one() {
+ assert_eq!(p("(?is)a.(?i-s)a."), c(&[
+ liti('a'), Expr::AnyChar, liti('a'), Expr::AnyCharNoNL,
+ ]));
+ }
+
+ #[test]
+ fn flags_inline_negate() {
+ assert_eq!(p("(?i)a(?-i)a"), c(&[liti('a'), lit('a')]));
+ }
+
+ #[test]
+ fn flags_group_inline() {
+ assert_eq!(p("(a(?i)a)a"), c(&[
+ Expr::Group {
+ e: b(c(&[lit('a'), liti('a')])),
+ i: Some(1),
+ name: None,
+ },
+ lit('a'),
+ ]));
+ }
+
+ #[test]
+ fn flags_group_inline_retain() {
+ assert_eq!(p("(?i)((?-i)a)a"), c(&[
+ Expr::Group {
+ e: b(lit('a')),
+ i: Some(1),
+ name: None,
+ },
+ liti('a'),
+ ]));
+ }
+
+ #[test]
+ fn escape_simple() {
+ assert_eq!(p(r"\a\f\t\n\r\v"), c(&[
+ lit('\x07'), lit('\x0C'), lit('\t'),
+ lit('\n'), lit('\r'), lit('\x0B'),
+ ]));
+ }
+
+ #[test]
+ fn escape_boundaries() {
+ assert_eq!(p(r"\A\z\b\B"), c(&[
+ Expr::StartText, Expr::EndText,
+ Expr::WordBoundary, Expr::NotWordBoundary,
+ ]));
+ }
+
+ #[test]
+ fn escape_punctuation() {
+ assert_eq!(p(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), c(&[
+ lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
+ lit('('), lit(')'), lit('|'), lit('['), lit(']'),
+ lit('{'), lit('}'), lit('^'), lit('$'), lit('#'),
+ ]));
+ }
+
+ #[test]
+ fn escape_octal() {
+ assert_eq!(p(r"\123"), lit('S'));
+ assert_eq!(p(r"\1234"), c(&[lit('S'), lit('4')]));
+ }
+
+ #[test]
+ fn escape_hex2() {
+ assert_eq!(p(r"\x53"), lit('S'));
+ assert_eq!(p(r"\x534"), c(&[lit('S'), lit('4')]));
+ }
+
+ #[test]
+ fn escape_hex() {
+ assert_eq!(p(r"\x{53}"), lit('S'));
+ assert_eq!(p(r"\x{53}4"), c(&[lit('S'), lit('4')]));
+ assert_eq!(p(r"\x{2603}"), lit('\u{2603}'));
+ }
+
+ #[test]
+ fn escape_unicode_name() {
+ assert_eq!(p(r"\p{Yi}"), Expr::Class(class(YI)));
+ }
+
+ #[test]
+ fn escape_unicode_letter() {
+ assert_eq!(p(r"\pZ"), Expr::Class(class(&[
+ ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+ ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+ ])));
+ }
+
+ #[test]
+ fn escape_unicode_name_case_fold() {
+ assert_eq!(p(r"(?i)\p{Yi}"), Expr::Class(class(YI).case_fold()));
+ }
+
+ #[test]
+ fn escape_unicode_letter_case_fold() {
+ assert_eq!(p(r"(?i)\pZ"), Expr::Class(class(&[
+ ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+ ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+ ]).case_fold()));
+ }
+
+ #[test]
+ fn escape_unicode_name_negate() {
+ assert_eq!(p(r"\P{Yi}"), Expr::Class(class(YI).negate()));
+ }
+
+ #[test]
+ fn escape_unicode_letter_negate() {
+ assert_eq!(p(r"\PZ"), Expr::Class(class(&[
+ ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+ ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+ ]).negate()));
+ }
+
+ #[test]
+ fn escape_unicode_name_negate_case_fold() {
+ assert_eq!(p(r"(?i)\P{Yi}"),
+ Expr::Class(class(YI).negate().case_fold()));
+ }
+
+ #[test]
+ fn escape_unicode_letter_negate_case_fold() {
+ assert_eq!(p(r"(?i)\PZ"), Expr::Class(class(&[
+ ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
+ ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+ ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+ ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+ ]).negate().case_fold()));
+ }
+
+ #[test]
+ fn escape_perl_d() {
+ assert_eq!(p(r"\d"), Expr::Class(class(PERLD)));
+ }
+
+ #[test]
+ fn escape_perl_s() {
+ assert_eq!(p(r"\s"), Expr::Class(class(PERLS)));
+ }
+
+ #[test]
+ fn escape_perl_w() {
+ assert_eq!(p(r"\w"), Expr::Class(class(PERLW)));
+ }
+
+ #[test]
+ fn escape_perl_d_negate() {
+ assert_eq!(p(r"\D"), Expr::Class(class(PERLD).negate()));
+ }
+
+ #[test]
+ fn escape_perl_s_negate() {
+ assert_eq!(p(r"\S"), Expr::Class(class(PERLS).negate()));
+ }
+
+ #[test]
+ fn escape_perl_w_negate() {
+ assert_eq!(p(r"\W"), Expr::Class(class(PERLW).negate()));
+ }
+
+ #[test]
+ fn escape_perl_d_case_fold() {
+ assert_eq!(p(r"(?i)\d"), Expr::Class(class(PERLD).case_fold()));
+ }
+
+ #[test]
+ fn escape_perl_s_case_fold() {
+ assert_eq!(p(r"(?i)\s"), Expr::Class(class(PERLS).case_fold()));
+ }
+
+ #[test]
+ fn escape_perl_w_case_fold() {
+ assert_eq!(p(r"(?i)\w"), Expr::Class(class(PERLW).case_fold()));
+ }
+
+ #[test]
+ fn escape_perl_d_case_fold_negate() {
+ assert_eq!(p(r"(?i)\D"),
+ Expr::Class(class(PERLD).negate().case_fold()));
+ }
+
+ #[test]
+ fn escape_perl_s_case_fold_negate() {
+ assert_eq!(p(r"(?i)\S"),
+ Expr::Class(class(PERLS).negate().case_fold()));
+ }
+
+ #[test]
+ fn escape_perl_w_case_fold_negate() {
+ assert_eq!(p(r"(?i)\W"),
+ Expr::Class(class(PERLW).negate().case_fold()));
+ }
+
+ #[test]
+ fn class_singleton() {
+ assert_eq!(p(r"[a]"), Expr::Class(class(&[('a', 'a')])));
+ assert_eq!(p(r"[\x00]"), Expr::Class(class(&[('\x00', '\x00')])));
+ assert_eq!(p(r"[\n]"), Expr::Class(class(&[('\n', '\n')])));
+ assert_eq!(p("[\n]"), Expr::Class(class(&[('\n', '\n')])));
+ }
+
+ #[test]
+ fn class_singleton_negate() {
+ assert_eq!(p(r"[^a]"), Expr::Class(class(&[
+ ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
+ ])));
+ assert_eq!(p(r"[^\x00]"), Expr::Class(class(&[
+ ('\x01', '\u{10FFFF}'),
+ ])));
+ assert_eq!(p(r"[^\n]"), Expr::Class(class(&[
+ ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
+ ])));
+ assert_eq!(p("[^\n]"), Expr::Class(class(&[
+ ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
+ ])));
+ }
+
+ #[test]
+ fn class_singleton_class() {
+ assert_eq!(p(r"[\d]"), Expr::Class(class(PERLD)));
+ assert_eq!(p(r"[\p{Yi}]"), Expr::Class(class(YI)));
+ }
+
+ #[test]
+ fn class_singleton_class_negate() {
+ assert_eq!(p(r"[^\d]"), Expr::Class(class(PERLD).negate()));
+ assert_eq!(p(r"[^\w]"), Expr::Class(class(PERLW).negate()));
+ assert_eq!(p(r"[^\s]"), Expr::Class(class(PERLS).negate()));
+ }
+
+ #[test]
+ fn class_singleton_class_negate_negate() {
+ assert_eq!(p(r"[^\D]"), Expr::Class(class(PERLD)));
+ assert_eq!(p(r"[^\W]"), Expr::Class(class(PERLW)));
+ assert_eq!(p(r"[^\S]"), Expr::Class(class(PERLS)));
+ }
+
+ #[test]
+ fn class_singleton_class_casei() {
+ assert_eq!(p(r"(?i)[\d]"), Expr::Class(class(PERLD).case_fold()));
+ assert_eq!(p(r"(?i)[\p{Yi}]"), Expr::Class(class(YI).case_fold()));
+ }
+
+ #[test]
+ fn class_singleton_class_negate_casei() {
+ assert_eq!(p(r"(?i)[^\d]"),
+ Expr::Class(class(PERLD).negate().case_fold()));
+ assert_eq!(p(r"(?i)[^\w]"),
+ Expr::Class(class(PERLW).negate().case_fold()));
+ assert_eq!(p(r"(?i)[^\s]"),
+ Expr::Class(class(PERLS).negate().case_fold()));
+ }
+
+ #[test]
+ fn class_singleton_class_negate_negate_casei() {
+ assert_eq!(p(r"(?i)[^\D]"), Expr::Class(class(PERLD).case_fold()));
+ assert_eq!(p(r"(?i)[^\W]"), Expr::Class(class(PERLW).case_fold()));
+ assert_eq!(p(r"(?i)[^\S]"), Expr::Class(class(PERLS).case_fold()));
+ }
+
+ #[test]
+ fn class_multiple_class() {
+ assert_eq!(p(r"[\d\p{Yi}]"), Expr::Class(classes(&[
+ PERLD, YI,
+ ])));
+ }
+
+ #[test]
+ fn class_multiple_class_negate() {
+ assert_eq!(p(r"[^\d\p{Yi}]"), Expr::Class(classes(&[
+ PERLD, YI,
+ ]).negate()));
+ }
+
+ #[test]
+ fn class_multiple_class_negate_negate() {
+ let nperld = class(PERLD).negate();
+ let nyi = class(YI).negate();
+ let cls = CharClass::empty().merge(nperld).merge(nyi);
+ assert_eq!(p(r"[^\D\P{Yi}]"), Expr::Class(cls.negate()));
+ }
+
+ #[test]
+ fn class_multiple_class_casei() {
+ assert_eq!(p(r"(?i)[\d\p{Yi}]"), Expr::Class(classes(&[
+ PERLD, YI,
+ ]).case_fold()));
+ }
+
+ #[test]
+ fn class_multiple_class_negate_casei() {
+ assert_eq!(p(r"(?i)[^\d\p{Yi}]"), Expr::Class(classes(&[
+ PERLD, YI,
+ ]).negate().case_fold()));
+ }
+
+ #[test]
+ fn class_multiple_class_negate_negate_casei() {
+ let nperld = class(PERLD).negate();
+ let nyi = class(YI).negate();
+ let class = CharClass::empty().merge(nperld).merge(nyi);
+ assert_eq!(p(r"(?i)[^\D\P{Yi}]"),
+ Expr::Class(class.negate().case_fold()));
+ }
+
+ #[test]
+ fn class_class_hypen() {
+ assert_eq!(p(r"[\p{Yi}-]"), Expr::Class(classes(&[
+ &[('-', '-')], YI,
+ ])));
+ assert_eq!(p(r"[\p{Yi}-a]"), Expr::Class(classes(&[
+ &[('-', '-')], &[('a', 'a')], YI,
+ ])));
+ }
+
+ #[test]
+ fn class_brackets() {
+ assert_eq!(p("[]]"), Expr::Class(class(&[(']', ']')])));
+ assert_eq!(p("[][]"), Expr::Class(class(&[('[', '['), (']', ']')])));
+ assert_eq!(p("[[]]"), Expr::Concat(vec![
+ Expr::Class(class(&[('[', '[')])),
+ lit(']'),
+ ]));
+ }
+
+ #[test]
+ fn class_brackets_hypen() {
+ assert_eq!(p("[]-]"), Expr::Class(class(&[('-', '-'), (']', ']')])));
+ assert_eq!(p("[-]]"), Expr::Concat(vec![
+ Expr::Class(class(&[('-', '-')])),
+ lit(']'),
+ ]));
+ }
+
+ #[test]
+ fn class_overlapping() {
+ assert_eq!(p("[a-fd-h]"), Expr::Class(class(&[('a', 'h')])));
+ assert_eq!(p("[a-fg-m]"), Expr::Class(class(&[('a', 'm')])));
+ }
+
+ #[test]
+ fn ascii_class() {
+ assert_eq!(p("[:upper:]"), Expr::Class(class(UPPER)));
+ assert_eq!(p("[[:upper:]]"), Expr::Class(class(UPPER)));
+ }
+
+ #[test]
+ fn ascii_class_not() {
+ assert_eq!(p("[:abc:]"),
+ Expr::Class(class(&[(':', ':'), ('a', 'c')])));
+ }
+
+ #[test]
+ fn ascii_class_multiple() {
+ assert_eq!(p("[[:lower:][:upper:]]"),
+ Expr::Class(classes(&[UPPER, LOWER])));
+ }
+
+ #[test]
+ fn ascii_class_negate() {
+ assert_eq!(p("[[:^upper:]]"), Expr::Class(class(UPPER).negate()));
+ assert_eq!(p("[^[:^upper:]]"), Expr::Class(class(UPPER)));
+ }
+
+ #[test]
+ fn ascii_class_negate_multiple() {
+ let (nlower, nupper) = (class(LOWER).negate(), class(UPPER).negate());
+ let cls = CharClass::empty().merge(nlower).merge(nupper);
+ assert_eq!(p("[[:^lower:][:^upper:]]"), Expr::Class(cls.clone()));
+ assert_eq!(p("[^[:^lower:][:^upper:]]"), Expr::Class(cls.negate()));
+ }
+
+ #[test]
+ fn ascii_class_case_fold() {
+ assert_eq!(p("(?i)[:upper:]"), Expr::Class(class(UPPER).case_fold()));
+ assert_eq!(p("(?i)[[:upper:]]"),
+ Expr::Class(class(UPPER).case_fold()));
+ }
+
+ #[test]
+ fn ascii_class_negate_case_fold() {
+ assert_eq!(p("(?i)[[:^upper:]]"),
+ Expr::Class(class(UPPER).negate().case_fold()));
+ assert_eq!(p("(?i)[^[:^upper:]]"),
+ Expr::Class(class(UPPER).case_fold()));
+ }
+
+ #[test]
+ fn ignore_space_literal() {
+ assert_eq!(p("(?x) a b c"), Expr::Concat(vec![
+ lit('a'), lit('b'), lit('c'),
+ ]));
+ }
+
+ #[test]
+ fn ignore_space_literal_off() {
+ assert_eq!(p("(?x) a b c(?-x) a"), Expr::Concat(vec![
+ lit('a'), lit('b'), lit('c'), lit(' '), lit('a'),
+ ]));
+ }
+
+ #[test]
+ fn ignore_space_class() {
+ assert_eq!(p("(?x)[a
+ - z
+]"), Expr::Class(class(&[('a', 'z')])));
+ assert_eq!(p("(?x)[ ^ a
+ - z
+]"), Expr::Class(class(&[('a', 'z')]).negate()));
+ }
+
+ #[test]
+ fn ignore_space_escape() {
+ assert_eq!(p(r"(?x)\ d"), Expr::Class(class(PERLD)));
+ assert_eq!(p(r"(?x)\
+ D"), Expr::Class(class(PERLD).negate()));
+ }
+
+ #[test]
+ fn ignore_space_comments() {
+ assert_eq!(p(r"(?x)(?P<foo>
+ a # comment 1
+)(?P<bar>
+ z # comment 2
+)"), Expr::Concat(vec![
+ Expr::Group {
+ e: Box::new(lit('a')),
+ i: Some(1),
+ name: Some("foo".into()),
+ },
+ Expr::Group {
+ e: Box::new(lit('z')),
+ i: Some(2),
+ name: Some("bar".into()),
+ },
+ ]));
+ }
+
+ #[test]
+ fn ignore_space_comments_re_enable() {
+ assert_eq!(p(r"(?x)a # hi
+(?-x:#) # sweet"), Expr::Concat(vec![
+ lit('a'),
+ Expr::Group {
+ e: Box::new(lit('#')),
+ i: None,
+ name: None,
+ },
+ ]));
+ }
+
+ #[test]
+ fn ignore_space_escape_punctuation() {
+ assert_eq!(p(r"(?x)\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), c(&[
+ lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
+ lit('('), lit(')'), lit('|'), lit('['), lit(']'),
+ lit('{'), lit('}'), lit('^'), lit('$'), lit('#'),
+ ]));
+ }
+
+ #[test]
+ fn ignore_space_escape_hash() {
+ assert_eq!(p(r"(?x)a\# # hi there"), Expr::Concat(vec![
+ lit('a'),
+ lit('#'),
+ ]));
+ }
+
+
+
+ macro_rules! test_err {
+ ($re:expr, $pos:expr, $kind:expr) => {{
+ let err = Parser::parse($re).unwrap_err();
+ assert_eq!($pos, err.pos);
+ assert_eq!($kind, err.kind);
+ assert!($re.contains(&err.surround));
+ }}
+ }
+
+ #[test]
+ fn error_repeat_no_expr_simple() {
+ test_err!("(*", 1, ErrorKind::RepeaterExpectsExpr);
+ }
+
+ #[test]
+ fn error_repeat_no_expr_counted() {
+ test_err!("({5}", 1, ErrorKind::RepeaterExpectsExpr);
+ }
+
+ #[test]
+ fn error_repeat_beginning_counted() {
+ test_err!("{5}", 0, ErrorKind::RepeaterExpectsExpr);
+ }
+
+ #[test]
+ fn error_repeat_illegal_exprs_simple() {
+ test_err!("a**", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::ZeroOrMore,
+ greedy: true,
+ }));
+ test_err!("a|*", 2,
+ ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
+ );
+ }
+
+ #[test]
+ fn error_repeat_illegal_exprs_counted() {
+ test_err!("a*{5}", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
+ e: b(lit('a')),
+ r: Repeater::ZeroOrMore,
+ greedy: true,
+ }));
+ test_err!("a|{5}", 2,
+ ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
+ );
+ }
+
+ #[test]
+ fn error_repeat_empty_number() {
+ test_err!("a{}", 2, ErrorKind::MissingBase10);
+ }
+
+ #[test]
+ fn error_repeat_eof() {
+ test_err!("a{5", 3, ErrorKind::UnclosedRepeat);
+ }
+
+ #[test]
+ fn error_repeat_empty_number_eof() {
+ test_err!("a{xyz", 5, ErrorKind::InvalidBase10("xyz".into()));
+ test_err!("a{12,xyz", 8, ErrorKind::InvalidBase10("xyz".into()));
+ }
+
+ #[test]
+ fn error_repeat_invalid_number() {
+ test_err!("a{9999999999}", 12,
+ ErrorKind::InvalidBase10("9999999999".into()));
+ test_err!("a{1,9999999999}", 14,
+ ErrorKind::InvalidBase10("9999999999".into()));
+ }
+
+ #[test]
+ fn error_repeat_invalid_number_extra() {
+ test_err!("a{12x}", 5, ErrorKind::InvalidBase10("12x".into()));
+ test_err!("a{1,12x}", 7, ErrorKind::InvalidBase10("12x".into()));
+ }
+
+ #[test]
+ fn error_repeat_invalid_range() {
+ test_err!("a{2,1}", 5,
+ ErrorKind::InvalidRepeatRange { min: 2, max: 1 });
+ }
+
+ #[test]
+ fn error_alternate_empty() {
+ test_err!("|a", 0, ErrorKind::EmptyAlternate);
+ }
+
+ #[test]
+ fn error_alternate_empty_with_group() {
+ test_err!("(|a)", 1, ErrorKind::EmptyAlternate);
+ }
+
+ #[test]
+ fn error_alternate_empty_with_alternate() {
+ test_err!("a||", 2, ErrorKind::EmptyAlternate);
+ }
+
+ #[test]
+ fn error_close_paren_unopened_empty() {
+ test_err!(")", 0, ErrorKind::UnopenedParen);
+ }
+
+ #[test]
+ fn error_close_paren_unopened() {
+ test_err!("ab)", 2, ErrorKind::UnopenedParen);
+ }
+
+ #[test]
+ fn error_close_paren_unopened_with_alt() {
+ test_err!("a|b)", 3, ErrorKind::UnopenedParen);
+ }
+
+ #[test]
+ fn error_close_paren_empty_alt() {
+ test_err!("(a|)", 3, ErrorKind::EmptyAlternate);
+ }
+
+ #[test]
+ fn error_close_paren_empty_group() {
+ test_err!("()", 1, ErrorKind::EmptyGroup);
+ }
+
+ #[test]
+ fn error_close_paren_empty_group_with_name() {
+ test_err!("(?P<foo>)", 8, ErrorKind::EmptyGroup);
+ }
+
+ #[test]
+ fn error_finish_concat_unclosed() {
+ test_err!("ab(xy", 2, ErrorKind::UnclosedParen);
+ }
+
+ #[test]
+ fn error_finish_concat_empty_alt() {
+ test_err!("a|", 2, ErrorKind::EmptyAlternate);
+ }
+
+ #[test]
+ fn error_group_name_invalid() {
+ test_err!("(?P<a#>x)", 6, ErrorKind::InvalidCaptureName("a#".into()));
+ }
+
+ #[test]
+ fn error_group_name_invalid_leading() {
+ test_err!("(?P<1a>a)", 6, ErrorKind::InvalidCaptureName("1a".into()));
+ }
+
+ #[test]
+ fn error_group_name_unexpected_eof() {
+ test_err!("(?P<a", 5, ErrorKind::UnclosedCaptureName("a".into()));
+ }
+
+ #[test]
+ fn error_group_name_empty() {
+ test_err!("(?P<>a)", 4, ErrorKind::EmptyCaptureName);
+ }
+
+ #[test]
+ fn error_group_opts_unrecognized_flag() {
+ test_err!("(?z:a)", 2, ErrorKind::UnrecognizedFlag('z'));
+ }
+
+ #[test]
+ fn error_group_opts_unexpected_eof() {
+ test_err!("(?i", 3, ErrorKind::UnexpectedFlagEof);
+ }
+
+ #[test]
+ fn error_group_opts_double_negation() {
+ test_err!("(?-i-s:a)", 4, ErrorKind::DoubleFlagNegation);
+ }
+
+ #[test]
+ fn error_group_opts_empty_negation() {
+ test_err!("(?i-:a)", 4, ErrorKind::EmptyFlagNegation);
+ }
+
+ #[test]
+ fn error_group_opts_empty() {
+ test_err!("(?)", 2, ErrorKind::EmptyFlagNegation);
+ }
+
+ #[test]
+ fn error_escape_unexpected_eof() {
+ test_err!(r"\", 1, ErrorKind::UnexpectedEscapeEof);
+ }
+
+ #[test]
+ fn error_escape_unrecognized() {
+ test_err!(r"\m", 1, ErrorKind::UnrecognizedEscape('m'));
+ }
+
+ #[test]
+ fn error_escape_hex2_eof0() {
+ test_err!(r"\x", 2, ErrorKind::UnexpectedTwoDigitHexEof);
+ }
+
+ #[test]
+ fn error_escape_hex2_eof1() {
+ test_err!(r"\xA", 3, ErrorKind::UnexpectedTwoDigitHexEof);
+ }
+
+ #[test]
+ fn error_escape_hex2_invalid() {
+ test_err!(r"\xAG", 4, ErrorKind::InvalidBase16("AG".into()));
+ }
+
+ #[test]
+ fn error_escape_hex_eof0() {
+ test_err!(r"\x{", 3, ErrorKind::InvalidBase16("".into()));
+ }
+
+ #[test]
+ fn error_escape_hex_eof1() {
+ test_err!(r"\x{A", 4, ErrorKind::UnclosedHex);
+ }
+
+ #[test]
+ fn error_escape_hex_invalid() {
+ test_err!(r"\x{AG}", 5, ErrorKind::InvalidBase16("AG".into()));
+ }
+
+ #[test]
+ fn error_escape_hex_invalid_scalar_value_surrogate() {
+ test_err!(r"\x{D800}", 7, ErrorKind::InvalidScalarValue(0xD800));
+ }
+
+ #[test]
+ fn error_escape_hex_invalid_scalar_value_high() {
+ test_err!(r"\x{110000}", 9, ErrorKind::InvalidScalarValue(0x110000));
+ }
+
+ #[test]
+ fn error_escape_hex_invalid_u32() {
+ test_err!(r"\x{9999999999}", 13,
+ ErrorKind::InvalidBase16("9999999999".into()));
+ }
+
+ #[test]
+ fn error_unicode_unclosed() {
+ test_err!(r"\p{", 3, ErrorKind::UnclosedUnicodeName);
+ test_err!(r"\p{Greek", 8, ErrorKind::UnclosedUnicodeName);
+ }
+
+ #[test]
+ fn error_unicode_no_letter() {
+ test_err!(r"\p", 2, ErrorKind::UnexpectedEscapeEof);
+ }
+
+ #[test]
+ fn error_unicode_unknown_letter() {
+ test_err!(r"\pA", 3, ErrorKind::UnrecognizedUnicodeClass("A".into()));
+ }
+
+ #[test]
+ fn error_unicode_unknown_name() {
+ test_err!(r"\p{Yii}", 7,
+ ErrorKind::UnrecognizedUnicodeClass("Yii".into()));
+ }
+
+ #[test]
+ fn error_class_eof_empty() {
+ test_err!("[", 1, ErrorKind::UnexpectedClassEof);
+ test_err!("[^", 2, ErrorKind::UnexpectedClassEof);
+ }
+
+ #[test]
+ fn error_class_eof_non_empty() {
+ test_err!("[a", 2, ErrorKind::UnexpectedClassEof);
+ test_err!("[^a", 3, ErrorKind::UnexpectedClassEof);
+ }
+
+ #[test]
+ fn error_class_eof_range() {
+ test_err!("[a-", 3, ErrorKind::UnexpectedClassEof);
+ test_err!("[^a-", 4, ErrorKind::UnexpectedClassEof);
+ test_err!("[---", 4, ErrorKind::UnexpectedClassEof);
+ }
+
+ #[test]
+ fn error_class_invalid_escape() {
+ test_err!(r"[\pA]", 4,
+ ErrorKind::UnrecognizedUnicodeClass("A".into()));
+ }
+
+ #[test]
+ fn error_class_valid_escape_not_allowed() {
+ test_err!(r"[\A]", 3, ErrorKind::InvalidClassEscape(Expr::StartText));
+ }
+
+ #[test]
+ fn error_class_range_valid_escape_not_allowed() {
+ test_err!(r"[a-\d]", 5,
+ ErrorKind::InvalidClassEscape(Expr::Class(class(PERLD))));
+ test_err!(r"[a-\A]", 5,
+ ErrorKind::InvalidClassEscape(Expr::StartText));
+ test_err!(r"[\A-a]", 3,
+ ErrorKind::InvalidClassEscape(Expr::StartText));
+ }
+
+ #[test]
+ fn error_class_invalid_range() {
+ test_err!("[z-a]", 4, ErrorKind::InvalidClassRange {
+ start: 'z',
+ end: 'a',
+ });
+ }
+
+ #[test]
+ fn error_class_empty_range() {
+ test_err!("[]", 2, ErrorKind::UnexpectedClassEof);
+ test_err!("[^]", 3, ErrorKind::UnexpectedClassEof);
+ }
+
+ #[test]
+ fn error_duplicate_capture_name() {
+ test_err!("(?P<a>.)(?P<a>.)", 14,
+ ErrorKind::DuplicateCaptureName("a".into()));
+ }
+}
+
+
+