Switch to edit mode.
Name(s): link_check1: ":link_check(text, local_url [,known_good [,known_bad]])"
2: " => {good URLs, bad URLs}"
3: "Scans through the HTML text for any links or images that don't exist. Will actually go out on the web to check each link. 'Local_url' is the URL of where the text sits (so that it can deal with relative URLs)."
4: "Optional arguments are lists of known good and known bad URLs (used to speed up link checks across a bunch of files.)"
5: "Returns a list of URLs that worked, and a list that didn't."
6: {text, local_url, ?known_good = {}, ?known_bad = {}} = args
7: tokens = $html_utils:html2token_suspended(text)
8: bad = good = {}
9: for token in (tokens)
10: link = ""
11: if (index(token, "<IMG ") == 1)
12: "We've found an image."
13: if (!(start = index(token, "SRC=")))
14: "An image without a SRC? Hmm..."
15: continue
16: endif
17: link = token[start + 4..$]
18: elseif (index(token, "<A ") == 1)
19: "We've found an anchor."
20: if (!(start = index(token, "HREF=")))
21: "No HREF? Maybe it was a NAME anchor."
22: continue
23: endif
24: link = token[start + 5..$]
25: else
26: "This token is either a non-link tag, or plain text - go away."
27: endif
28: if (link)
29: "First, lets strip off the garbage."
30: link && link[1] == "\"" && (link = link[2..$])
31: end = $math_utils:min_positive(index(link, "\""), index(link, ">"), index(link, " "))
32: if (!end)
33: continue
34: endif
35: link = link[1..end - 1]
36: link && link[$] == "\"" && (link = link[1..$ - 1])
37: if (!link)
38: continue
39: endif
40: "After all this pruning we now have what should be a url."
41: if (link in good || link in bad)
42: "We already have this one."
43: elseif (link in known_bad)
44: bad = setadd(bad, link)
45: elseif (link in known_good)
46: good = setadd(good, link)
47: elseif (index(link, "mailto:") == 1)
48: "There is no good way to verify an email address."
49: elseif (index(link, "http://") == 1)
50: "Absolute web link."
51: if (!`$network:confirm_URL(link) ! ANY')
52: bad = {@bad, link}
53: else
54: good = {@good, link}
55: endif
56: elseif (index(link, "://"))
57: "Must be a gopher, ftp, or telnet link."
58: "Not supported (yet)."
59: else
60: "Must be a local, relative link."
61: link = $file_utils:relative_url(local_url, link)
62: if (!`$network:confirm_URL(link) ! ANY')
63: bad = {@bad, link}
64: else
65: good = {@good, link}
66: endif
67: endif
68: endif
69: $command_utils:suspend_if_needed()
70: endfor
71: return {good, bad}
72: "Last modified by Dax (#789) on Thu May 28 13:12:54 1998 EDT."