Switch to edit mode.
Name(s): link_check1: ":link_check(text, local_url [,known_good [,known_bad]])" 2: " => {good URLs, bad URLs}" 3: "Scans through the HTML text for any links or images that don't exist. Will actually go out on the web to check each link. 'Local_url' is the URL of where the text sits (so that it can deal with relative URLs)." 4: "Optional arguments are lists of known good and known bad URLs (used to speed up link checks across a bunch of files.)" 5: "Returns a list of URLs that worked, and a list that didn't." 6: {text, local_url, ?known_good = {}, ?known_bad = {}} = args 7: tokens = $html_utils:html2token_suspended(text) 8: bad = good = {} 9: for token in (tokens) 10: link = "" 11: if (index(token, "<IMG ") == 1) 12: "We've found an image." 13: if (!(start = index(token, "SRC="))) 14: "An image without a SRC? Hmm..." 15: continue 16: endif 17: link = token[start + 4..$] 18: elseif (index(token, "<A ") == 1) 19: "We've found an anchor." 20: if (!(start = index(token, "HREF="))) 21: "No HREF? Maybe it was a NAME anchor." 22: continue 23: endif 24: link = token[start + 5..$] 25: else 26: "This token is either a non-link tag, or plain text - go away." 27: endif 28: if (link) 29: "First, lets strip off the garbage." 30: link && link[1] == "\"" && (link = link[2..$]) 31: end = $math_utils:min_positive(index(link, "\""), index(link, ">"), index(link, " ")) 32: if (!end) 33: continue 34: endif 35: link = link[1..end - 1] 36: link && link[$] == "\"" && (link = link[1..$ - 1]) 37: if (!link) 38: continue 39: endif 40: "After all this pruning we now have what should be a url." 41: if (link in good || link in bad) 42: "We already have this one." 43: elseif (link in known_bad) 44: bad = setadd(bad, link) 45: elseif (link in known_good) 46: good = setadd(good, link) 47: elseif (index(link, "mailto:") == 1) 48: "There is no good way to verify an email address." 49: elseif (index(link, "http://") == 1) 50: "Absolute web link." 51: if (!`$network:confirm_URL(link) ! ANY') 52: bad = {@bad, link} 53: else 54: good = {@good, link} 55: endif 56: elseif (index(link, "://")) 57: "Must be a gopher, ftp, or telnet link." 58: "Not supported (yet)." 59: else 60: "Must be a local, relative link." 61: link = $file_utils:relative_url(local_url, link) 62: if (!`$network:confirm_URL(link) ! ANY') 63: bad = {@bad, link} 64: else 65: good = {@good, link} 66: endif 67: endif 68: endif 69: $command_utils:suspend_if_needed() 70: endfor 71: return {good, bad} 72: "Last modified by Dax (#789) on Thu May 28 13:12:54 1998 EDT."