58 lines
2.1 KiB
Scheme
58 lines
2.1 KiB
Scheme
;; Copyright (C) 2024 Skylar Widulski <cobra@vern.cc>
|
|
;;
|
|
;; This file is part of Soprano
|
|
;;
|
|
;; Soprano is free software: you can redistribute it and/or modify it under the
|
|
;; terms of the GNU Affero General Public License as published by the Free
|
|
;; Software Foundation, either version 3 of the License, or (at your option) any
|
|
;; later version.
|
|
;;
|
|
;; This program is distributed in the hope that it will be useful, but WITHOUT
|
|
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
|
|
;; for more details.
|
|
;;
|
|
;; You should have received a copy of the GNU Affero General Public License
|
|
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
(define-module (soprano scraping)
|
|
#:use-module (libxml2)
|
|
#:use-module (system foreign)
|
|
#:use-module (ice-9 string-fun)
|
|
#:use-module (ice-9 binary-ports)
|
|
#:export (proxy get-gif get-search))
|
|
|
|
(define (proxy url)
|
|
(string-append "/proxy?url=" url))
|
|
|
|
(define (get-gif d)
|
|
(let ((pref "/html/body/div/div/div[3]/div/div[1]"))
|
|
(list
|
|
(get-xpath-string (string-append pref "/h1/text()") d #f)
|
|
(get-xpath-string (string-append pref "/div[1]/div/div/div/div/img/@src")
|
|
d #t))))
|
|
|
|
(define (get-search d)
|
|
(define pref "/html/body/div/div/div[3]/div/div/div/div[4]")
|
|
(define column (get-xpath-node (string-append pref "/div[1]") d))
|
|
(define figure %null-pointer)
|
|
(define lst (list ""))
|
|
(while (not (null-pointer? column))
|
|
(display (name column))
|
|
(newline)
|
|
(if (equal? (name column) "div")
|
|
(begin
|
|
(set! figure (child1 column 0))
|
|
(display (name figure))
|
|
(newline)
|
|
(while (not (null-pointer? figure))
|
|
(append!
|
|
lst
|
|
(list
|
|
(list
|
|
(text (attrs (child figure)))
|
|
(text (attrs (child (child (child figure))))))))
|
|
(set! figure (next figure 0)))))
|
|
(set! column (next column 0)))
|
|
(cdr lst))
|