diff options
Diffstat (limited to 'cmd/gen-posix/http_hacks.go')
-rw-r--r-- | cmd/gen-posix/http_hacks.go | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/cmd/gen-posix/http_hacks.go b/cmd/gen-posix/http_hacks.go new file mode 100644 index 0000000..16b8a8d --- /dev/null +++ b/cmd/gen-posix/http_hacks.go @@ -0,0 +1,156 @@ +package main + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "strings" + + "git.lukeshu.com/www/lib/httpcache" +) + +func _checkURL(url string) (string, error) { + switch { + case strings.HasPrefix(url, "https://web.archive.org/"): + _, err := httpcache.Get(url, nil) + return url, err + case strings.HasPrefix(url, "https://www2.opengroup.org/ogsys/catalog/"): + _, err := httpcache.Get(url, nil) + if err == nil { + return url, nil + } + if !errors.Is(err, os.ErrNotExist) { // don't hide non-404 errors + return "", err + } + suffix := strings.TrimPrefix(url, "https://www2.opengroup.org/ogsys/catalog/") + url2 := "https://publications.opengroup.org/" + strings.ToLower(suffix) + _, err = httpcache.Get(url2, nil) + if err == nil { + return url2, nil + } + if !errors.Is(err, os.ErrNotExist) { // don't hide non-404 errors + return "", err + } + url3, err := _checkURL("https://web.archive.org/web/20170102/" + url) + if err == nil { + return url3, nil + } + return url+"#ERROR", nil + case url == "http://ieeexplore.ieee.org/servlet/opac?punumber=7394900": + return url+"#ERROR", nil + default: + _, err := httpcache.Get(url, nil) + if err != nil && errors.Is(err, os.ErrNotExist) { + return _checkURL("https://web.archive.org/web/20170102/" + url) + } + return url, err + } +} + +func checkURL(url string) string { + url2, err := _checkURL(url) + if err != nil { + panic(fmt.Errorf("URL=%q: %v", url, err)) + } + return url2 +} + +func nokogiriIgnoreFailure(htmlBytes []byte, expr string) string { + cmd := exec.Command("nokogiri", "-e", "puts "+expr) + cmd.Stderr = io.Discard + cmd.Stdin = bytes.NewReader(htmlBytes) + outBytes, _ := cmd.Output() + return strings.TrimSpace(string(outBytes)) +} + +func mockRedirect(url string) *http.Response { + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(""+ + "HTTP/1.1 302 Found\r\n"+ + "Location: "+url+"\r\n"+ + "\r\n")), nil) + if err != nil { + panic(err) + } + return resp +} + +func mockForbidden() *http.Response { + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(""+ + "HTTP/1.1 403 Forbidden\r\n"+ + "\r\n")), nil) + if err != nil { + panic(err) + } + return resp +} + +func modifyResponse(url string, entry httpcache.CacheEntry, resp *http.Response) *http.Response { + switch { + case strings.HasPrefix(url, "https://web.archive.org/"): + htmlBytes, _ := io.ReadAll(resp.Body) + _ = resp.Body.Close() + + // native Wayback Machine redirect + redirect := nokogiriIgnoreFailure(htmlBytes, `$_.css("p.impatient a").first["href"]`) + if strings.HasPrefix(redirect, "https://web.archive.org/web/") { + return mockRedirect(redirect) + } + + // silly TOG SSO + if strings.Contains(url, "sso.opengroup.org") { + if bytes.Contains(htmlBytes, []byte("document.forms.postbinding.submit()")) { + redirect := nokogiriIgnoreFailure(htmlBytes, `$_.css("#postbinding").first["action"]`) + if redirect != "" { + return mockRedirect(redirect) + } + } + if bytes.Contains(htmlBytes, []byte("General Authorization Error")) { + return mockForbidden() + } + } + + // We drained resp.Body, so re-create it. + resp, err := http.ReadResponse(bufio.NewReader(strings.NewReader(string(entry))), nil) + if err != nil { + panic(err) + } + return resp + default: + return resp + } +} + +type mock404 struct { + Msg string +} + +// Is implements the interface for [errors.Is]. +func (e *mock404) Is(target error) bool { + return target == os.ErrNotExist +} + +// Error implements [error]. +func (e *mock404) Error() string { + return e.Msg +} + +func checkRedirect(req *http.Request, via []*http.Request) error { + // net/http.defaultCheckRedirect + if len(via) >= 10 { + return errors.New("stopped after 10 redirects") + } + + // detect redirects that should be 404s + oldURL := via[len(via)-1].URL + newURL := req.URL + if (newURL.Path == "/" || newURL.Path == "") && !(oldURL.Path == "/" || oldURL.Path == "") { + return &mock404{Msg: fmt.Sprintf("should have been a 404: %q redirected to %q", oldURL.String(), newURL.String())} + } + + return nil +} |