Soprano/soprano/scraping.scm

58 lines
2.1 KiB
Scheme

;; Copyright (C) 2024 Skylar Widulski <cobra@vern.cc>
;;
;; This file is part of Soprano
;;
;; Soprano is free software: you can redistribute it and/or modify it under the
;; terms of the GNU Affero General Public License as published by the Free
;; Software Foundation, either version 3 of the License, or (at your option) any
;; later version.
;;
;; This program is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
;; for more details.
;;
;; You should have received a copy of the GNU Affero General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
(define-module (soprano scraping)
#:use-module (libxml2)
#:use-module (system foreign)
#:use-module (ice-9 string-fun)
#:use-module (ice-9 binary-ports)
#:export (proxy get-gif get-search))
(define (proxy url)
(string-append "/proxy?url=" url))
(define (get-gif d)
(let ((pref "/html/body/div/div/div[3]/div/div[1]"))
(list
(get-xpath-string (string-append pref "/h1/text()") d #f)
(get-xpath-string (string-append pref "/div[2]/div/div/div/div/img/@src")
d #t))))
(define (get-search d)
(define pref "/html/body/div/div/div[3]/div/div/div/div[4]")
(define column (get-xpath-node (string-append pref "/div[1]") d))
(define figure %null-pointer)
(define lst (list ""))
(while (not (null-pointer? column))
(display (name column))
(newline)
(if (equal? (name column) "div")
(begin
(set! figure (child1 column 0))
(display (name figure))
(newline)
(while (not (null-pointer? figure))
(append!
lst
(list
(list
(text (attrs (child figure)))
(text (attrs (child (child (child figure))))))))
(set! figure (next figure 0)))))
(set! column (next column 0)))
(cdr lst))