From 64106c4d3d4ddba8c7bc2af75376e6d3d3d75601 Mon Sep 17 00:00:00 2001 From: Date: Mon, 29 Jun 2015 20:16:15 +0000 Subject: Update documentation --- src/regex_syntax/lib.rs.html | 2465 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2465 insertions(+) create mode 100644 src/regex_syntax/lib.rs.html (limited to 'src/regex_syntax/lib.rs.html') diff --git a/src/regex_syntax/lib.rs.html b/src/regex_syntax/lib.rs.html new file mode 100644 index 0000000..7c8b82b --- /dev/null +++ b/src/regex_syntax/lib.rs.html @@ -0,0 +1,2465 @@ + + + + + + + + + + lib.rs.html -- source + + + + + + + + + + + + + + + +
   1
+   2
+   3
+   4
+   5
+   6
+   7
+   8
+   9
+  10
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  24
+  25
+  26
+  27
+  28
+  29
+  30
+  31
+  32
+  33
+  34
+  35
+  36
+  37
+  38
+  39
+  40
+  41
+  42
+  43
+  44
+  45
+  46
+  47
+  48
+  49
+  50
+  51
+  52
+  53
+  54
+  55
+  56
+  57
+  58
+  59
+  60
+  61
+  62
+  63
+  64
+  65
+  66
+  67
+  68
+  69
+  70
+  71
+  72
+  73
+  74
+  75
+  76
+  77
+  78
+  79
+  80
+  81
+  82
+  83
+  84
+  85
+  86
+  87
+  88
+  89
+  90
+  91
+  92
+  93
+  94
+  95
+  96
+  97
+  98
+  99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
+1101
+1102
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
+1151
+1152
+1153
+1154
+1155
+1156
+1157
+1158
+1159
+1160
+1161
+1162
+1163
+1164
+1165
+1166
+1167
+1168
+1169
+1170
+1171
+1172
+1173
+1174
+1175
+1176
+1177
+1178
+1179
+1180
+1181
+1182
+1183
+1184
+
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+This crate provides a regular expression parser and an abstract syntax for
+regular expressions. The abstract syntax is defined by the `Expr` type. The
+concrete syntax is enumerated in the
+[`regex`](../regex/index.html#syntax)
+crate documentation.
+
+Note that since this crate is first and foremost an implementation detail for
+the `regex` crate, it may experience more frequent breaking changes. It is
+exposed as a separate crate so that others may use it to do analysis on regular
+expressions or even build their own matching engine.
+
+# Example: parsing an expression
+
+Parsing a regular expression can be done with the `Expr::parse` function.
+
+```rust
+use regex_syntax::Expr;
+
+assert_eq!(Expr::parse(r"ab|yz").unwrap(), Expr::Alternate(vec![
+    Expr::Literal { chars: vec!['a', 'b'], casei: false },
+    Expr::Literal { chars: vec!['y', 'z'], casei: false },
+]));
+```
+
+# Example: inspecting an error
+
+The parser in this crate provides very detailed error values. For example,
+if an invalid character class range is given:
+
+```rust
+use regex_syntax::{Expr, ErrorKind};
+
+let err = Expr::parse(r"[z-a]").unwrap_err();
+assert_eq!(err.position(), 4);
+assert_eq!(err.kind(), &ErrorKind::InvalidClassRange {
+    start: 'z',
+    end: 'a',
+});
+```
+
+Or unbalanced parentheses:
+
+```rust
+use regex_syntax::{Expr, ErrorKind};
+
+let err = Expr::parse(r"ab(cd").unwrap_err();
+assert_eq!(err.position(), 2);
+assert_eq!(err.kind(), &ErrorKind::UnclosedParen);
+```
+*/
+
+#![deny(missing_docs)]
+
+#[cfg(test)] extern crate quickcheck;
+#[cfg(test)] extern crate rand;
+
+mod parser;
+mod unicode;
+
+use std::char;
+use std::cmp::{Ordering, max, min};
+use std::fmt;
+use std::iter::IntoIterator;
+use std::ops::Deref;
+use std::slice;
+use std::vec;
+
+use unicode::case_folding;
+
+use self::Expr::*;
+use self::Repeater::*;
+
+pub use parser::is_punct;
+
+/// A regular expression abstract syntax tree.
+///
+/// An `Expr` represents the abstract syntax of a regular expression.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum Expr {
+    /// An empty regex (which never matches any text).
+    Empty,
+    /// A sequence of one or more literal characters to be matched.
+    Literal {
+        /// The characters.
+        chars: Vec<char>,
+        /// Whether to match case insensitively.
+        casei: bool,
+    },
+    /// Match any character, excluding new line.
+    AnyChar,
+    /// Match any character.
+    AnyCharNoNL,
+    /// A character class.
+    Class(CharClass),
+    /// Match the start of a line or beginning of input.
+    StartLine,
+    /// Match the end of a line or end of input.
+    EndLine,
+    /// Match the beginning of input.
+    StartText,
+    /// Match the end of input.
+    EndText,
+    /// Match a word boundary (word character on one side and a non-word
+    /// character on the other).
+    WordBoundary,
+    /// Match a position that is not a word boundary (word or non-word
+    /// characters on both sides).
+    NotWordBoundary,
+    /// A group, possibly non-capturing.
+    Group {
+        /// The expression inside the group.
+        e: Box<Expr>,
+        /// The capture index (starting at `1`) only for capturing groups.
+        i: Option<usize>,
+        /// The capture name, only for capturing named groups.
+        name: Option<String>,
+    },
+    /// A repeat operator (`?`, `*`, `+` or `{m,n}`).
+    Repeat {
+        /// The expression to be repeated. Limited to literals, `.`, classes
+        /// or grouped expressions.
+        e: Box<Expr>,
+        /// The type of repeat operator used.
+        r: Repeater,
+        /// Whether the repeat is greedy (match the most) or not (match the
+        /// least).
+        greedy: bool,
+    },
+    /// A concatenation of expressions. Must be matched one after the other.
+    ///
+    /// N.B. A concat expression can only appear at the top-level or
+    /// immediately inside a group expression.
+    Concat(Vec<Expr>),
+    /// An alternation of expressions. Only one must match.
+    ///
+    /// N.B. An alternate expression can only appear at the top-level or
+    /// immediately inside a group expression.
+    Alternate(Vec<Expr>),
+}
+
+type CaptureIndex = Option<usize>;
+
+type CaptureName = Option<String>;
+
+/// The type of a repeat operator expression.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum Repeater {
+    /// Match zero or one (`?`).
+    ZeroOrOne,
+    /// Match zero or more (`*`).
+    ZeroOrMore,
+    /// Match one or more (`+`).
+    OneOrMore,
+    /// Match for at least `min` and at most `max` (`{m,n}`).
+    ///
+    /// When `max` is `None`, there is no upper bound on the number of matches.
+    Range {
+        /// Lower bound on the number of matches.
+        min: u32,
+        /// Optional upper bound on the number of matches.
+        max: Option<u32>,
+    },
+}
+
+/// A character class.
+///
+/// A character class has a canonical format that the parser guarantees. Its
+/// canonical format is defined by the following invariants:
+///
+/// 1. Given any Unicode scalar value, it is matched by *at most* one character
+///    range in a canonical character class.
+/// 2. Every adjacent character range is separated by at least one Unicode
+///    scalar value.
+/// 3. Given any pair of character ranges `r1` and `r2`, if
+///    `r1.end < r2.start`, then `r1` comes before `r2` in a canonical
+///    character class.
+///
+/// In sum, any `CharClass` produced by this crate's parser is a sorted
+/// sequence of non-overlapping ranges. This makes it possible to test whether
+/// a character is matched by a class with a binary search.
+///
+/// Additionally, a character class may be marked *case insensitive*. If it's
+/// case insensitive, then:
+///
+/// 1. Simple case folding has been applied to all ranges.
+/// 2. Simple case folding must be applied to a character before testing
+///    whether it matches the character class.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CharClass {
+    ranges: Vec<ClassRange>,
+    casei: bool,
+}
+
+/// A single inclusive range in a character class.
+///
+/// Since range boundaries are defined by Unicode scalar values, the boundaries
+/// can never be in the open interval `(0xD7FF, 0xE000)`. However, a range may
+/// *cover* codepoints that are not scalar values.
+///
+/// Note that this has a few convenient impls on `PartialEq` and `PartialOrd`
+/// for testing whether a character is contained inside a given range.
+#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
+pub struct ClassRange {
+    /// The start character of the range.
+    ///
+    /// This must be less than or equal to `end`.
+    pub start: char,
+
+    /// The end character of the range.
+    ///
+    /// This must be greater than or equal to `end`.
+    pub end: char,
+}
+
+impl Expr {
+    /// Parses a string in a regular expression syntax tree.
+    pub fn parse(s: &str) -> Result<Expr> {
+        parser::Parser::parse(s).map(|e| e.simplify())
+    }
+
+    /// Returns true iff the expression can be repeated by a quantifier.
+    fn can_repeat(&self) -> bool {
+        match *self {
+            Literal{..}
+            | AnyChar
+            | AnyCharNoNL
+            | Class(_)
+            | StartLine | EndLine | StartText | EndText
+            | WordBoundary | NotWordBoundary
+            | Group{..}
+            => true,
+            _ => false,
+        }
+    }
+
+    fn simplify(self) -> Expr {
+        fn combine_literals(es: &mut Vec<Expr>, e: Expr) {
+            match (es.pop(), e) {
+                (None, e) => es.push(e),
+                (Some(Literal { chars: mut chars1, casei: casei1 }),
+                 Literal { chars: chars2, casei: casei2 }) => {
+                    if casei1 == casei2 {
+                        chars1.extend(chars2);
+                        es.push(Literal { chars: chars1, casei: casei1 });
+                    } else {
+                        es.push(Literal { chars: chars1, casei: casei1 });
+                        es.push(Literal { chars: chars2, casei: casei2 });
+                    }
+                }
+                (Some(e1), e2) => {
+                    es.push(e1);
+                    es.push(e2);
+                }
+            }
+        }
+        match self {
+            Repeat { e, r, greedy } => Repeat {
+                e: Box::new(e.simplify()),
+                r: r,
+                greedy: greedy,
+            },
+            Group { e, i, name } => {
+                let e = e.simplify();
+                if i.is_none() && name.is_none() && e.can_repeat() {
+                    e
+                } else {
+                    Group { e: Box::new(e), i: i, name: name }
+                }
+            }
+            Concat(es) => {
+                let mut new_es = Vec::with_capacity(es.len());
+                for e in es {
+                    combine_literals(&mut new_es, e.simplify());
+                }
+                if new_es.len() == 1 {
+                    new_es.pop().unwrap()
+                } else {
+                    Concat(new_es)
+                }
+            }
+            Alternate(es) => Alternate(es.into_iter()
+                                         .map(|e| e.simplify())
+                                         .collect()),
+            e => e,
+        }
+    }
+}
+
+impl Deref for CharClass {
+    type Target = Vec<ClassRange>;
+    fn deref(&self) -> &Vec<ClassRange> { &self.ranges }
+}
+
+impl IntoIterator for CharClass {
+    type Item = ClassRange;
+    type IntoIter = vec::IntoIter<ClassRange>;
+    fn into_iter(self) -> vec::IntoIter<ClassRange> { self.ranges.into_iter() }
+}
+
+impl<'a> IntoIterator for &'a CharClass {
+    type Item = &'a ClassRange;
+    type IntoIter = slice::Iter<'a, ClassRange>;
+    fn into_iter(self) -> slice::Iter<'a, ClassRange> { self.iter() }
+}
+
+impl CharClass {
+    /// Create a new class from an existing set of ranges.
+    fn new(ranges: Vec<ClassRange>) -> CharClass {
+        CharClass { ranges: ranges, casei: false }
+    }
+
+    /// Create an empty class.
+    fn empty() -> CharClass {
+        CharClass::new(Vec::new())
+    }
+
+    /// Returns true if `c` is matched by this character class.
+    ///
+    /// If this character class is case insensitive, then simple case folding
+    /// is applied to `c` before checking for a match.
+    pub fn matches(&self, mut c: char) -> bool {
+        if self.is_case_insensitive() {
+            c = simple_case_fold(c)
+        }
+        self.binary_search_by(|range| c.partial_cmp(range).unwrap()).is_ok()
+    }
+
+    /// Returns true if this character class should be matched case
+    /// insensitively.
+    ///
+    /// When `true`, simple case folding has already been applied to the
+    /// class.
+    pub fn is_case_insensitive(&self) -> bool {
+        self.casei
+    }
+
+    /// Create a new empty class from this one.
+    ///
+    /// Namely, its capacity and case insensitive setting will be the same.
+    fn to_empty(&self) -> CharClass {
+        CharClass { ranges: Vec::with_capacity(self.len()), casei: self.casei }
+    }
+
+    /// Merge two classes and canonicalize them.
+    #[cfg(test)]
+    fn merge(mut self, other: CharClass) -> CharClass {
+        self.ranges.extend(other);
+        self.canonicalize()
+    }
+
+    /// Canonicalze any sequence of ranges.
+    ///
+    /// This is responsible for enforcing the canonical format invariants
+    /// as described on the docs for the `CharClass` type.
+    fn canonicalize(mut self) -> CharClass {
+        // TODO: Save some cycles here by checking if already canonicalized.
+        self.ranges.sort();
+        let mut ordered = self.to_empty(); // TODO: Do this in place?
+        for candidate in self {
+            // If the candidate overlaps with an existing range, then it must
+            // be the most recent range added because we process the candidates
+            // in order.
+            if let Some(or) = ordered.ranges.last_mut() {
+                if or.overlapping(candidate) {
+                    *or = or.merge(candidate);
+                    continue;
+                }
+            }
+            ordered.ranges.push(candidate);
+        }
+        ordered
+    }
+
+    /// Negates the character class.
+    ///
+    /// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
+    /// if and only if `c` does not match `self.negate()`.
+    ///
+    /// Note that this cannot be called on a character class that has had
+    /// case folding applied to it. (Because case folding turns on a flag
+    /// and doesn't store every possible matching character. Therefore,
+    /// its negation is tricky to get right. Turns out, we don't need it
+    /// anyway!)
+    fn negate(mut self) -> CharClass {
+        fn range(s: char, e: char) -> ClassRange { ClassRange::new(s, e) }
+
+        // Never allow negating of a class that has been case folded!
+        assert!(!self.casei);
+
+        if self.is_empty() { return self; }
+        self = self.canonicalize();
+        let mut inv = self.to_empty();
+        if self[0].start > '\x00' {
+            inv.ranges.push(range('\x00', dec_char(self[0].start)));
+        }
+        for win in self.windows(2) {
+            inv.ranges.push(range(inc_char(win[0].end),
+                                  dec_char(win[1].start)));
+        }
+        if self[self.len() - 1].end < char::MAX {
+            inv.ranges.push(range(inc_char(self[self.len() - 1].end),
+                                  char::MAX));
+        }
+        inv
+    }
+
+    /// Apply case folding to this character class.
+    ///
+    /// One a class had been case folded, it cannot be negated.
+    fn case_fold(self) -> CharClass {
+        let mut folded = self.to_empty();
+        folded.casei = true;
+        for r in self {
+            // Applying case folding to a range is expensive because *every*
+            // character needed to be examined. Thus, we avoid that drudgery
+            // if no character in the current range is in our case folding
+            // table.
+            if r.needs_case_folding() {
+                folded.ranges.extend(r.case_fold());
+            } else {
+                folded.ranges.push(r);
+            }
+        }
+        folded.canonicalize()
+    }
+}
+
+impl ClassRange {
+    /// Create a new class range.
+    ///
+    /// If `end < start`, then the two values are swapped so that
+    /// the invariant `start <= end` is preserved.
+    fn new(start: char, end: char) -> ClassRange {
+        if start <= end {
+            ClassRange { start: start, end: end }
+        } else {
+            ClassRange { start: end, end: start }
+        }
+    }
+
+    /// Create a range of one character.
+    fn one(c: char) -> ClassRange {
+        ClassRange { start: c, end: c }
+    }
+
+    /// Returns true if and only if the two ranges are overlapping. Note that
+    /// since ranges are inclusive, `a-c` and `d-f` are overlapping!
+    fn overlapping(self, other: ClassRange) -> bool {
+        max(self.start, other.start) <= inc_char(min(self.end, other.end))
+    }
+
+    /// Creates a new range representing the union of `self` and `other.
+    fn merge(self, other: ClassRange) -> ClassRange {
+        ClassRange {
+            start: min(self.start, other.start),
+            end: max(self.end, other.end),
+        }
+    }
+
+    /// Returns true if and only if this range contains a character that is
+    /// in the case folding table.
+    fn needs_case_folding(self) -> bool {
+        case_folding::C_plus_S_table
+        .binary_search_by(|&(c, _)| self.partial_cmp(&c).unwrap()).is_ok()
+    }
+
+    /// Apply case folding to this range.
+    ///
+    /// Since case folding might add characters such that the range is no
+    /// longer contiguous, this returns multiple class ranges. They are in
+    /// canonical order.
+    fn case_fold(self) -> Vec<ClassRange> {
+        let (s, e) = (self.start as u32, self.end as u32 + 1);
+        let mut start = simple_case_fold(self.start);
+        let mut end = start;
+        let mut next_case_fold = self.start;
+        let mut ranges = Vec::with_capacity(100);
+        for mut c in (s+1..e).filter_map(char::from_u32) {
+            if c >= next_case_fold {
+                c = match simple_case_fold_result(c) {
+                    Ok(i) => case_folding::C_plus_S_table[i].1,
+                    Err(i) => {
+                        if i < case_folding::C_plus_S_table.len() {
+                            next_case_fold = case_folding::C_plus_S_table[i].0;
+                        } else {
+                            next_case_fold = '\u{10FFFF}'
+                        }
+                        c
+                    }
+                };
+            }
+            if c != inc_char(end) {
+                ranges.push(ClassRange::new(start, end));
+                start = c;
+            }
+            end = c;
+        }
+        ranges.push(ClassRange::new(start, end));
+        ranges
+    }
+}
+
+impl PartialEq<char> for ClassRange {
+    #[inline]
+    fn eq(&self, other: &char) -> bool {
+        self.start <= *other && *other <= self.end
+    }
+}
+
+impl PartialEq<ClassRange> for char {
+    #[inline]
+    fn eq(&self, other: &ClassRange) -> bool {
+        other.eq(self)
+    }
+}
+
+impl PartialOrd<char> for ClassRange {
+    #[inline]
+    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
+        Some(if self == other {
+            Ordering::Equal
+        } else if *other > self.end {
+            Ordering::Greater
+        } else {
+            Ordering::Less
+        })
+    }
+}
+
+impl PartialOrd<ClassRange> for char {
+    #[inline]
+    fn partial_cmp(&self, other: &ClassRange) -> Option<Ordering> {
+        other.partial_cmp(self).map(|o| o.reverse())
+    }
+}
+
+/// This implementation of `Display` will write a regular expression from the
+/// syntax tree. It does not write the original string parsed.
+impl fmt::Display for Expr {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Empty => write!(f, ""),
+            Literal { ref chars, casei } => {
+                if casei { try!(write!(f, "(?i:")); }
+                for &c in chars {
+                    try!(write!(f, "{}", quote_char(c)));
+                }
+                if casei { try!(write!(f, ")")); }
+                Ok(())
+            }
+            AnyChar => write!(f, "(?s:.)"),
+            AnyCharNoNL => write!(f, "."),
+            Class(ref cls) => write!(f, "{}", cls),
+            StartLine => write!(f, "(?m:^)"),
+            EndLine => write!(f, "(?m:$)"),
+            StartText => write!(f, r"^"),
+            EndText => write!(f, r"$"),
+            WordBoundary => write!(f, r"\b"),
+            NotWordBoundary => write!(f, r"\B"),
+            Group { ref e, i: None, name: None } => write!(f, "(?:{})", e),
+            Group { ref e, name: None, .. } => write!(f, "({})", e),
+            Group { ref e, name: Some(ref n), .. } => {
+                write!(f, "(?P<{}>{})", n, e)
+            }
+            Repeat { ref e, r, greedy } => {
+                match &**e {
+                    &Literal { ref chars, .. } if chars.len() > 1 => {
+                        try!(write!(f, "(?:{}){}", e, r))
+                    }
+                    _ => try!(write!(f, "{}{}", e, r)),
+                }
+                if !greedy { try!(write!(f, "?")); }
+                Ok(())
+            }
+            Concat(ref es) => {
+                for e in es {
+                    try!(write!(f, "{}", e));
+                }
+                Ok(())
+            }
+            Alternate(ref es) => {
+                for (i, e) in es.iter().enumerate() {
+                    if i > 0 { try!(write!(f, "|")); }
+                    try!(write!(f, "{}", e));
+                }
+                Ok(())
+            }
+        }
+    }
+}
+
+impl fmt::Display for Repeater {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            ZeroOrOne => write!(f, "?"),
+            ZeroOrMore => write!(f, "*"),
+            OneOrMore => write!(f, "+"),
+            Range { min: s, max: None } => write!(f, "{{{},}}", s),
+            Range { min: s, max: Some(e) } if s == e => write!(f, "{{{}}}", s),
+            Range { min: s, max: Some(e) } => write!(f, "{{{}, {}}}", s, e),
+        }
+    }
+}
+
+impl fmt::Display for CharClass {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.casei {
+            try!(write!(f, "(?i:"));
+        }
+        try!(write!(f, "["));
+        for range in self.iter() {
+            try!(write!(f, "{}", range));
+        }
+        try!(write!(f, "]"));
+        if self.casei {
+            try!(write!(f, ")"));
+        }
+        Ok(())
+    }
+}
+
+impl fmt::Display for ClassRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}-{}", quote_char(self.start), quote_char(self.end))
+    }
+}
+
+/// An alias for computations that can return a `Error`.
+pub type Result<T> = ::std::result::Result<T, Error>;
+
+/// A parse error.
+///
+/// This includes details about the specific type of error and a rough
+/// approximation of where it occurred.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Error {
+    pos: usize,
+    surround: String,
+    kind: ErrorKind,
+}
+
+/// The specific type of parse error that can occur.
+#[derive(Clone, Debug, PartialEq)]
+pub enum ErrorKind {
+    /// A negation symbol is used twice in flag settings.
+    /// e.g., `(?-i-s)`.
+    DoubleFlagNegation,
+    /// The same capture name was used more than once.
+    /// e.g., `(?P<a>.)(?P<a>.)`.
+    DuplicateCaptureName(String),
+    /// An alternate is empty. e.g., `(|a)`.
+    EmptyAlternate,
+    /// A capture group name is empty. e.g., `(?P<>a)`.
+    EmptyCaptureName,
+    /// A negation symbol was not proceded by any flags. e.g., `(?i-)`.
+    EmptyFlagNegation,
+    /// A group is empty. e.g., `()`.
+    EmptyGroup,
+    /// An invalid number was used in a counted repetition. e.g., `a{b}`.
+    InvalidBase10(String),
+    /// An invalid hexadecimal number was used in an escape sequence.
+    /// e.g., `\xAG`.
+    InvalidBase16(String),
+    /// An invalid capture name was used. e.g., `(?P<0a>b)`.
+    InvalidCaptureName(String),
+    /// An invalid class range was givien. Specifically, when the start of the
+    /// range is greater than the end. e.g., `[z-a]`.
+    InvalidClassRange {
+        /// The first character specified in the range.
+        start: char,
+        /// The second character specified in the range.
+        end: char,
+    },
+    /// An escape sequence was used in a character class where it is not
+    /// allowed. e.g., `[a-\pN]` or `[\A]`.
+    InvalidClassEscape(Expr),
+    /// An invalid counted repetition min/max was given. e.g., `a{2,1}`.
+    InvalidRepeatRange {
+        /// The first number specified in the repetition.
+        min: u32,
+        /// The second number specified in the repetition.
+        max: u32,
+    },
+    /// An invalid Unicode scalar value was used in a long hexadecimal
+    /// sequence. e.g., `\x{D800}`.
+    InvalidScalarValue(u32),
+    /// An empty counted repetition operator. e.g., `a{}`.
+    MissingBase10,
+    /// A repetition operator was not applied to an expression. e.g., `*`.
+    RepeaterExpectsExpr,
+    /// A repetition operator was applied to an expression that cannot be
+    /// repeated. e.g., `a+*` or `a|*`.
+    RepeaterUnexpectedExpr(Expr),
+    /// A capture group name that is never closed. e.g., `(?P<a`.
+    UnclosedCaptureName(String),
+    /// An unclosed hexadecimal literal. e.g., `\x{a`.
+    UnclosedHex,
+    /// An unclosed parenthesis. e.g., `(a`.
+    UnclosedParen,
+    /// An unclosed counted repetition operator. e.g., `a{2`.
+    UnclosedRepeat,
+    /// An unclosed named Unicode class. e.g., `\p{Yi`.
+    UnclosedUnicodeName,
+    /// Saw end of regex before class was closed. e.g., `[a`.
+    UnexpectedClassEof,
+    /// Saw end of regex before escape sequence was closed. e.g., `\`.
+    UnexpectedEscapeEof,
+    /// Saw end of regex before flags were closed. e.g., `(?i`.
+    UnexpectedFlagEof,
+    /// Saw end of regex before two hexadecimal digits were seen. e.g., `\xA`.
+    UnexpectedTwoDigitHexEof,
+    /// Unopened parenthesis. e.g., `)`.
+    UnopenedParen,
+    /// Unrecognized escape sequence. e.g., `\q`.
+    UnrecognizedEscape(char),
+    /// Unrecognized flag. e.g., `(?a)`.
+    UnrecognizedFlag(char),
+    /// Unrecognized named Unicode class. e.g., `\p{Foo}`.
+    UnrecognizedUnicodeClass(String),
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl Error {
+    /// Returns an approximate *character* offset at which the error occurred.
+    ///
+    /// The character offset may be equal to the number of characters in the
+    /// string, in which case it should be interpreted as pointing to the end
+    /// of the regex.
+    pub fn position(&self) -> usize {
+        self.pos
+    }
+
+    /// Returns the type of the regex parse error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+}
+
+impl ErrorKind {
+    fn description(&self) -> &str {
+        use ErrorKind::*;
+        match *self {
+            DoubleFlagNegation => "double flag negation",
+            DuplicateCaptureName(_) => "duplicate capture name",
+            EmptyAlternate => "empty alternate",
+            EmptyCaptureName => "empty capture name",
+            EmptyFlagNegation => "flag negation without any flags",
+            EmptyGroup => "empty group (e.g., '()')",
+            InvalidBase10(_) => "invalid base 10 number",
+            InvalidBase16(_) => "invalid base 16 number",
+            InvalidCaptureName(_) => "invalid capture name",
+            InvalidClassRange{..} => "invalid character class range",
+            InvalidClassEscape(_) => "invalid escape sequence in class",
+            InvalidRepeatRange{..} => "invalid counted repetition range",
+            InvalidScalarValue(_) => "invalid Unicode scalar value",
+            MissingBase10 => "missing count in repetition operator",
+            RepeaterExpectsExpr => "repetition operator missing expression",
+            RepeaterUnexpectedExpr(_) => "expression cannot be repeated",
+            UnclosedCaptureName(_) => "unclosed capture group name",
+            UnclosedHex => "unclosed hexadecimal literal",
+            UnclosedParen => "unclosed parenthesis",
+            UnclosedRepeat => "unclosed counted repetition operator",
+            UnclosedUnicodeName => "unclosed Unicode class literal",
+            UnexpectedClassEof => "unexpected EOF in character class",
+            UnexpectedEscapeEof => "unexpected EOF in escape sequence",
+            UnexpectedFlagEof => "unexpected EOF in flags",
+            UnexpectedTwoDigitHexEof => "unexpected EOF in hex literal",
+            UnopenedParen => "unopened parenthesis",
+            UnrecognizedEscape(_) => "unrecognized escape sequence",
+            UnrecognizedFlag(_) => "unrecognized flag",
+            UnrecognizedUnicodeClass(_) => "unrecognized Unicode class name",
+            __Nonexhaustive => unreachable!(),
+        }
+    }
+}
+
+impl ::std::error::Error for Error {
+    fn description(&self) -> &str {
+        self.kind.description()
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Error parsing regex near '{}' at character offset {}: {}",
+               self.surround, self.pos, self.kind)
+    }
+}
+
+impl fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use ErrorKind::*;
+        match *self {
+            DoubleFlagNegation =>
+                write!(f, "Only one negation symbol is allowed in flags."),
+            DuplicateCaptureName(ref s) =>
+                write!(f, "Capture name '{}' is used more than once.", s),
+            EmptyAlternate =>
+                write!(f, "Alternations cannot be empty."),
+            EmptyCaptureName =>
+                write!(f, "Capture names cannot be empty."),
+            EmptyFlagNegation =>
+                write!(f, "Flag negation requires setting at least one flag."),
+            EmptyGroup =>
+                write!(f, "Empty regex groups (e.g., '()') are not allowed."),
+            InvalidBase10(ref s) =>
+                write!(f, "Not a valid base 10 number: '{}'", s),
+            InvalidBase16(ref s) =>
+                write!(f, "Not a valid base 16 number: '{}'", s),
+            InvalidCaptureName(ref s) =>
+                write!(f, "Invalid capture name: '{}'. Capture names must \
+                           consist of [_a-zA-Z0-9] and are not allowed to \
+                           start with with a number.", s),
+            InvalidClassRange { start, end } =>
+                write!(f, "Invalid character class range '{}-{}'. \
+                           Character class ranges must start with the smaller \
+                           character, but {} > {}", start, end, start, end),
+            InvalidClassEscape(ref e) =>
+                write!(f, "Invalid escape sequence in character \
+                           class: '{}'.", e),
+            InvalidRepeatRange { min, max } =>
+                write!(f, "Invalid counted repetition range: {{{}, {}}}. \
+                           Counted repetition ranges must start with the \
+                           minimum, but {} > {}", min, max, min, max),
+            InvalidScalarValue(c) =>
+                write!(f, "Number does not correspond to a Unicode scalar \
+                           value: '{}'.", c),
+            MissingBase10 =>
+                write!(f, "Missing maximum in counted reptition operator."),
+            RepeaterExpectsExpr =>
+                write!(f, "Missing expression for reptition operator."),
+            RepeaterUnexpectedExpr(ref e) =>
+                write!(f, "Invalid application of reptition operator to: \
+                          '{}'.", e),
+            UnclosedCaptureName(ref s) =>
+                write!(f, "Capture name group for '{}' is not closed. \
+                           (Missing a '>'.)", s),
+            UnclosedHex =>
+                write!(f, "Unclosed hexadecimal literal (missing a '}}')."),
+            UnclosedParen =>
+                write!(f, "Unclosed parenthesis."),
+            UnclosedRepeat =>
+                write!(f, "Unclosed counted repetition (missing a '}}')."),
+            UnclosedUnicodeName =>
+                write!(f, "Unclosed Unicode literal (missing a '}}')."),
+            UnexpectedClassEof =>
+                write!(f, "Character class was not closed before the end of \
+                           the regex (missing a ']')."),
+            UnexpectedEscapeEof =>
+                write!(f, "Started an escape sequence that didn't finish \
+                           before the end of the regex."),
+            UnexpectedFlagEof =>
+                write!(f, "Inline flag settings was not closed before the end \
+                           of the regex (missing a ')' or ':')."),
+            UnexpectedTwoDigitHexEof =>
+                write!(f, "Unexpected end of two digit hexadecimal literal."),
+            UnopenedParen =>
+                write!(f, "Unopened parenthesis."),
+            UnrecognizedEscape(c) =>
+                write!(f, "Unrecognized escape sequence: '\\{}'.", c),
+            UnrecognizedFlag(c) =>
+                write!(f, "Unrecognized flag: '{}'. \
+                           (Allowed flags: i, s, m, U, x.)", c),
+            UnrecognizedUnicodeClass(ref s) =>
+                write!(f, "Unrecognized Unicode class name: '{}'.", s),
+            __Nonexhaustive => unreachable!(),
+        }
+    }
+}
+
+/// Returns the Unicode *simple* case folding of `c`.
+///
+/// N.B. This is hidden because it really isn't the responsibility of this
+/// crate to do simple case folding. One hopes that either another crate or
+/// the standard library will be able to do this for us. In any case, we still
+/// expose it because it is used inside the various Regex engines.
+#[doc(hidden)]
+pub fn simple_case_fold(c: char) -> char {
+    simple_case_fold_result(c)
+        .map(|i| case_folding::C_plus_S_table[i].1)
+        .unwrap_or(c)
+}
+
+/// The result of binary search on the simple case folding table.
+///
+/// This level of detail is exposed so that we can do case folding on a
+/// range of characters efficiently.
+fn simple_case_fold_result(c: char) -> ::std::result::Result<usize, usize> {
+    case_folding::C_plus_S_table.binary_search_by(|&(x, _)| x.cmp(&c))
+}
+
+/// Escapes all regular expression meta characters in `text`.
+///
+/// The string returned may be safely used as a literal in a regular
+/// expression.
+pub fn quote(text: &str) -> String {
+    let mut quoted = String::with_capacity(text.len());
+    for c in text.chars() {
+        if parser::is_punct(c) {
+            quoted.push('\\');
+        }
+        quoted.push(c);
+    }
+    quoted
+}
+
+fn quote_char(c: char) -> String {
+    let mut s = String::new();
+    if parser::is_punct(c) {
+        s.push('\\');
+    }
+    s.push(c);
+    s
+}
+
+fn inc_char(c: char) -> char {
+    match c {
+        char::MAX => char::MAX,
+        '\u{D7FF}' => '\u{E000}',
+        c => char::from_u32(c as u32 + 1).unwrap(),
+    }
+}
+
+fn dec_char(c: char) -> char {
+    match c {
+        '\x00' => '\x00',
+        '\u{E000}' => '\u{D7FF}',
+        c => char::from_u32(c as u32 - 1).unwrap(),
+    }
+}
+
+/// Returns true if and only if `c` is a word character.
+#[doc(hidden)]
+pub fn is_word_char(c: char) -> bool {
+    match c {
+        '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z'  => true,
+        _ => ::unicode::regex::PERLW.binary_search_by(|&(start, end)| {
+            if c >= start && c <= end {
+                Ordering::Equal
+            } else if start > c {
+                Ordering::Greater
+            } else {
+                Ordering::Less
+            }
+        }).is_ok(),
+    }
+}
+
+#[cfg(test)]
+mod properties;
+
+#[cfg(test)]
+mod tests {
+    use {CharClass, ClassRange};
+
+    fn class(ranges: &[(char, char)]) -> CharClass {
+        let ranges = ranges.iter().cloned()
+                           .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
+        CharClass::new(ranges)
+    }
+
+    fn classi(ranges: &[(char, char)]) -> CharClass {
+        let mut cls = class(ranges);
+        cls.casei = true;
+        cls
+    }
+
+    #[test]
+    fn class_canon_no_change() {
+        let cls = class(&[('a', 'c'), ('x', 'z')]);
+        assert_eq!(cls.clone().canonicalize(), cls);
+    }
+
+    #[test]
+    fn class_canon_unordered() {
+        let cls = class(&[('x', 'z'), ('a', 'c')]);
+        assert_eq!(cls.canonicalize(), class(&[
+            ('a', 'c'), ('x', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_overlap() {
+        let cls = class(&[('x', 'z'), ('w', 'y')]);
+        assert_eq!(cls.canonicalize(), class(&[
+            ('w', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_overlap_many() {
+        let cls = class(&[
+            ('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
+            ('m', 'p'), ('l', 's'),
+        ]);
+        assert_eq!(cls.clone().canonicalize(), class(&[
+            ('a', 'j'), ('l', 's'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_overlap_many_case_fold() {
+        let cls = class(&[
+            ('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
+            ('M', 'P'), ('L', 'S'), ('c', 'f'),
+        ]);
+        assert_eq!(cls.case_fold(), classi(&[
+            ('a', 'j'), ('l', 's'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_overlap_boundary() {
+        let cls = class(&[('x', 'z'), ('u', 'w')]);
+        assert_eq!(cls.canonicalize(), class(&[
+            ('u', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_extreme_edge_case() {
+        let cls = class(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
+        assert_eq!(cls.canonicalize(), class(&[
+            ('\x00', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_canon_singles() {
+        let cls = class(&[('a', 'a'), ('b', 'b')]);
+        assert_eq!(cls.canonicalize(), class(&[('a', 'b')]));
+    }
+
+    #[test]
+    fn class_negate_single() {
+        let cls = class(&[('a', 'a')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_singles() {
+        let cls = class(&[('a', 'a'), ('b', 'b')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\x60'), ('\x63', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_multiples() {
+        let cls = class(&[('a', 'c'), ('x', 'z')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\x60'), ('\x64', '\x77'), ('\x7b', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_min_scalar() {
+        let cls = class(&[('\x00', 'a')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x62', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_max_scalar() {
+        let cls = class(&[('a', '\u{10FFFF}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\x60'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_everything() {
+        let cls = class(&[('\x00', '\u{10FFFF}')]);
+        assert_eq!(cls.negate(), class(&[]));
+    }
+
+    #[test]
+    fn class_negate_everything_sans_one() {
+        let cls = class(&[
+            ('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')
+        ]);
+        assert_eq!(cls.negate(), class(&[
+            ('\u{10FFFE}', '\u{10FFFE}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_surrogates_min() {
+        let cls = class(&[('\x00', '\u{D7FF}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\u{E000}', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_surrogates_min_edge() {
+        let cls = class(&[('\x00', '\u{D7FE}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\u{D7FF}', '\u{10FFFF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_surrogates_max() {
+        let cls = class(&[('\u{E000}', '\u{10FFFF}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\u{D7FF}'),
+        ]));
+    }
+
+    #[test]
+    fn class_negate_surrogates_max_edge() {
+        let cls = class(&[('\u{E001}', '\u{10FFFF}')]);
+        assert_eq!(cls.negate(), class(&[
+            ('\x00', '\u{E000}'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_retain_only_needed() {
+        let cls = class(&[('A', 'Z'), ('a', 'z')]);
+        assert_eq!(cls.case_fold(), classi(&[
+            ('a', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_az() {
+        let cls = class(&[('A', 'Z')]);
+        assert_eq!(cls.case_fold(), classi(&[
+            ('a', 'z'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_a_underscore() {
+        let cls = class(&[('A', 'A'), ('_', '_')]);
+        assert_eq!(cls.clone().canonicalize(), class(&[
+            ('A', 'A'), ('_', '_'),
+        ]));
+        assert_eq!(cls.case_fold(), classi(&[
+            ('_', '_'), ('a', 'a'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_a_equals() {
+        let cls = class(&[('A', 'A'), ('=', '=')]);
+        assert_eq!(cls.clone().canonicalize(), class(&[
+            ('=', '='), ('A', 'A'),
+        ]));
+        assert_eq!(cls.case_fold(), classi(&[
+            ('=', '='), ('a', 'a'),
+        ]));
+    }
+
+    #[test]
+    fn class_fold_no_folding_needed() {
+        let cls = class(&[('\x00', '\x10')]);
+        assert_eq!(cls.case_fold(), classi(&[
+            ('\x00', '\x10'),
+        ]));
+    }
+}
+
+
+ + + + + + + + + + + + + + + \ No newline at end of file -- cgit v1.2.3