UNfunny/unfunny/scraping.scm

54 lines
1.9 KiB
Scheme

;; Copyright (C) 2024 Skylar Widulski <cobra@vern.cc>
;;
;; This file is part of UNfunny
;;
;; UNfunny is free software: you can redistribute it and/or modify it under the
;; terms of the GNU Affero General Public License as published by the Free
;; Software Foundation, either version 3 of the License, or (at your option) any
;; later version.
;;
;; This program is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License
;; for more details.
;;
;; You should have received a copy of the GNU Affero General Public License
;; along with this program. If not, see <https://www.gnu.org/licenses/>.
(define-module (unfunny scraping)
#:use-module (unfunny wand)
#:use-module (libxml2)
#:use-module (system foreign)
#:use-module (ice-9 string-fun)
#:use-module (ice-9 binary-ports)
#:export (proxy crop get-picture))
(define (proxy url)
(string-append "/proxy?url=" url))
(define (crop image name)
(if (not (file-exists? "/tmp/unfunny"))
(mkdir "/tmp/unfunny"))
(define wand (new-magick-wand))
(magick-read-image-blob wand (bytevector->pointer image) (array-length image))
(magick-crop-image wand
(magick-get-image-width wand)
(- (magick-get-image-height wand) 20)
0 0)
(magick-write-image
wand
(string->pointer (string-append "/tmp/unfunny/" name)))
(set! wand (destroy-magick-wand wand))
(define data
(call-with-input-file (string-append
"/tmp/unfunny/" name)
get-bytevector-all))
(delete-file (string-append "/tmp/unfunny/" name))
data)
(define (get-picture d)
(let ((pref "/html/body/div/div[2]/div[2]/div/div[1]/div[1]/img/@"))
(list
(get-xpath-string (string-append pref "src") d #t)
(get-xpath-string (string-append pref "alt") d #t))))