summaryrefslogtreecommitdiff
path: root/indexer/src/sys_freebsd1.rs
blob: f2e9e296d87abe7be58845d35eed1805096605b4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
use std::collections::HashSet;
use std::io::Result;
use regex::Regex;
use postgres;

use open;
use pkg;


// Sync a FreeBSD <= 9.2 package respository.
//
// Reads "." to get a list of categories, "Latest" to get a list of all packages, and all category
// directories to figure out which package belongs in which category.
//
// Splitting a package filename into a package name and version is a hard problem. There are two
// strategies:
// 1. Use the listing from 'Latest' to get the list of package names, and use that to find the
//    longest matching substring in the package filename to split off the version.
// 2. Guessing, like splitver() below.
//
// Both strategies lead to errors. (1) doesn't always work because the 'Latest' directory tends to
// miss a few packages. (2) doesn't always work because version strings are too damn irregular.
// This function tries (1) first, then falls back to (2) if it couldn't find a matching package.
// This combined solution also isn't perfect, as sometimes a package prefix does exist, but is
// incomplete. E.g. 'pear-PHPUnit-1.3.3.tbz' is parsed as 'pear version PHPUnit-1.3.3' rather than
// 'pear-PHPUnit version 1.3.3', because there is a 'pear' package in 'Latest' but no
// 'pear-PHPUnit'. This is handled with a static list of package names to add to the 'pkgs' list,
// see EXTRA_PKGS below.
pub fn sync(pg: &postgres::GenericConnection, sys: i32, arch: &str, mirror: &str) -> Result<()> {
    let path = format!("{}Latest/", mirror);
    let mut pkgs : Vec<String> = open::Path{path: &path, cache: true, canbelocal: false}
        .dirlist()?.into_iter()
        .map(|(n,_)| trimext(&n).to_string())
        .collect();

    pkgs.extend(EXTRA_PKGS.into_iter().map(|e| e.to_string()));
    pkgs.sort_by(|a, b| b.len().cmp(&a.len())); // Longest first

    // List of packages (name+version) we've already seen; Some packages are present in multiple
    // categories, we only index the first found.
    let mut seenpkgs = HashSet::new();

    let cats = open::Path{path: mirror, cache: true, canbelocal: false}
        .dirlist()?.into_iter()
        .filter(|&(ref n,i)| i && n != "All" && n != "Latest")
        .map(|(n,_)| n);

    for cat in cats {
        trace!("Category: {}", cat);
        let path = format!("{}{}/", mirror, cat);
        let lst = open::Path{path: &path, cache: true, canbelocal: false}.dirlist()?.into_iter().map(|(n,_)| n);
        for f in lst {
            let name = trimext(&f);
            if !name.is_ascii() {
                warn!("Non-ASCII package name: {}", f);
                continue;
            }

            // The take() mystifies me; why is it necessary?
            let pkg = pkgs.iter()
                .find(|p| name.len() > p.len()+1 && name.starts_with(&p as &str) && &name[p.len() .. p.len()+1] == "-")
                .take().map(|p| (p as &str, &name[p.len()+1 .. ]))
                .or_else(|| splitver(name));

            if let Some((pkg, ver)) = pkg {
                if !seenpkgs.insert((pkg.to_string(), ver.to_string())) {
                    continue;
                }

                let path = format!("{}{}/{}", mirror, cat, f);
                pkg::pkg(pg, pkg::PkgOpt{
                    force: false,
                    sys: sys,
                    cat: &cat,
                    pkg: pkg,
                    ver: ver,
                    date: pkg::Date::Desc,
                    arch: Some(arch),
                    file: open::Path{
                        path: &path,
                        cache: false,
                        canbelocal: false,
                    },
                });
            } else {
                warn!("Unknown package: {}/{}", cat, f);
            }
        }
    }
    Ok(())
}


fn trimext(n: &str) -> &str {
    n.trim_end_matches(".tgz").trim_end_matches(".tbz")
}


fn splitver(n: &str) -> Option<(&str, &str)> {
    lazy_static!(
        static ref RE1: Regex = Regex::new("^(.+?)-([0-9].*)$").unwrap();
        static ref RE2: Regex = Regex::new("^(.+)-([^-]+)$").unwrap();
    );
    if let Some(cap) = RE1.captures(n) {
        Some((cap.get(1).unwrap().as_str(), cap.get(2).unwrap().as_str()))
    } else if let Some(cap) = RE2.captures(n) {
        Some((cap.get(1).unwrap().as_str(), cap.get(2).unwrap().as_str()))
    } else {
        None
    }
}


// This list may not be complete, and these packages may not necessarily have man pages.
const EXTRA_PKGS : &'static [&'static str] = &[
    "amanda-client",
    "amanda-server",
    "apache-event",
    "apache-itk",
    "apache-peruser",
    "apache-tomcat",
    "apache-worker",
    "bison-devel",
    "boxbackup-devel",
    "boxbackup-devel",
    "ffmpeg-devel",
    "flex-sdk",
    "fpc-gdb",
    "freeradius-mysql",
    "gdb-insight",
    "glib-reference",
    "gmime-24",
    "gmime-24-sharp",
    "gtk-reference",
    "gtk-sharp",
    "gtkmm-reference",
    "horde-content",
    "horde-groupware",
    "horde-timeobjects",
    "horde-webmail",
    "hping-devel",
    "ja-jvim-direct_canna",
    "ja-mutt-devel",
    "kdelibs-experimental",
    "kdepim-runtime",
    "lame-devel",
    "libdivxdecore-devel",
    "libquicktime-lame",
    "libtorrent-rasterbar",
    "linux-netscape-communicator",
    "mkisofs-devel",
    "mldonkey-core-devel",
    "mldonkey-gui-devel",
    "mod_log_sql-dtc",
    "nethack-qt",
    "nfdump-devel",
    "openssl-beta",
    "pear-PHPUnit",
    "pear-XML_Query2XML",
    "pear-phpunit-PHPUnit",
    "pgadmin3-unicode",
    "proftpd-mod_ldap",
    "proftpd-mod_sql_mysql",
    "proftpd-mod_sql_odbc",
    "proftpd-mod_sql_postgres",
    "proftpd-mod_sql_sqlite",
    "proftpd-mod_sql_tds",
    "qt-static",
    "rsyslog-gnutls",
    "rsyslog-gssapi",
    "rsyslog-libdbi",
    "rsyslog-mysql",
    "rsyslog-pgsql",
    "rsyslog-relp",
    "rsyslog-rfc3195",
    "rsyslog-snmp",
    "samba-libsmbclient",
    "samba-nmblookup",
    "squirrelmail-shared_calendars-plugin",
    "tcl-thread",
    "wxgtk2-common-devel",
    "wxgtk2-contrib-common-devel",
    "wxgtk2-utils-devel",
];