summaryrefslogtreecommitdiff
path: root/TLD/generate.pl
blob: 3a964975362416eb5cf34af5afc21831179fac4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/perl -w
# use strict;
use utf8;
use open ':utf8';
use Regexp::Assemble;
$dat="public_suffix_list.dat";
die(".dat file $dat not found!") unless -f "$dat";

sub generate {
  my $src = "./tld_template.js";
  my $dst = "./tld.js";
  my (@rx, @ex, $rx, $ex);
  open(DAT, $dat) || die("Cannot open $dat");
  while(<DAT>) {
    s/\./\\\./g;
    s/\s+utf.*//;
    s/\n//;
    if(/^!/) {
      s/^!//; 
      push(@ex, lc($_));
    } elsif (!/^(\/\/|[ \n\r]|$)/) {
      s/\*\\\./[^\\.]+\\./;
      push(@rx, lc($_));
    }
  }
  close(DAT);
  
  #$o = Regexp::Optimizer->new;
  #$o = Regexp::List->new;
  $o = Regexp::Assemble->new;
  $_ = $o->add(@rx)->as_string();
  s/\(\?-xism:(.*)\)/$1/;
  $rx = $_;
  @rx = NULL;

  $o = Regexp::Assemble->new;
  $_ = $o->add(@ex)->as_string();
  s/\(\?-xism:(.*)\)/$1/;
  $ex = $_;
  @ex = NULL;
  
  open(SRC, $src) || die("Cannot open $src");
  open(DST, ">$dst") || die("Cannot open $dst");
  while(<SRC>) {
    s/%tld_rx%/$rx/g;
    s/%tld_ex%/$ex/g;
    print DST;
    print;
  }
  close(SRC);
  close(DST);
}
generate();