rural-dict/main.py

89 lines
3.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python
from flask import Flask, render_template, request, redirect
import requests
from datetime import datetime, timezone
import html
import re
import json
import sys
from bs4 import BeautifulSoup
ERRROR_MESSAGE = 'Uh oh, could not connect to urban dictionary api'
DEFINE = "https://urbandictionary.com/define.php?term="
AUTHOR = "https://urbandictionary.com/author.php?author="
RANDOM = "https://urbandictionary.com/random.php"
HOME = "https://urbandictionary.com/"
def scrape(url, arg=None):
if arg == None:
data = requests.get(url)
else:
data = requests.get(f"{url}{arg}")
if data.status_code == 200:
our_path = re.sub(r".*://.*/", "/", request.url)
path = re.sub(r".*://.*/", "/", data.url)
if our_path != path:
return f"REDIRECT {path}"
ret = []
soup = BeautifulSoup(data.text, "html.parser")
for div in soup.find_all("div"):
defid = div.get('data-defid')
if defid != None:
definition = soup.find(attrs={"data-defid": [defid]})
word = definition.select("div div h1 a, div div h2 a")[0].text
meaning = definition.find(attrs={"class" : ["break-words meaning mb-4"]}).decode_contents()
example = definition.find(attrs={"class" : ["break-words example italic mb-4"]}).decode_contents()
contributor = definition.find(attrs={"class" : ["contributor font-bold"]})
ret.append([defid, word, meaning, example, contributor])
pages = soup.find(attrs={"class" : ["pagination text-xl text-center"]})
if pages == None:
pages = ""
return (ret, pages)
else:
return f"Couldn't get data from the API\n{data.status_code}"
def render(data):
return render_template('index.html', data=data)
app = Flask(__name__, template_folder="templates", static_folder="static")
@app.route('/')
def home():
scraped = scrape(HOME + {True: f"?page={request.args.get('page')}", False: ""} [request.args.get('page') != None])
if type(scraped) == str and scraped.startswith("REDIRECT"):
return redirect(scraped.replace("REDIRECT ", ""), 302)
return render(scraped)
@app.route('/define.php')
def define():
if request.args.get('term') != None:
scraped = scrape(DEFINE + request.args.get('term') + {True: f"&page={request.args.get('page')}", False: ""} [request.args.get('page') != None])
if type(scraped) == str and scraped.startswith("REDIRECT"):
return redirect(scraped.replace("REDIRECT ", ""), 302)
else:
return redirect("/", 302)
return render(scraped)
@app.route('/author.php')
def author():
if request.args.get('author') != None:
scraped = scrape(AUTHOR + request.args.get('author') + {True: f"&page={request.args.get('page')}", False: ""} [request.args.get('page') != None])
if type(scraped) == str and scraped.startswith("REDIRECT"):
return redirect(scraped.replace("REDIRECT ", ""), 302)
else:
return redirect("/", 302)
scraped = (scraped[0], str(scraped[1]).replace("»", "»").replace("›", "").replace("«", "«").replace("‹", ""))
return render(scraped)
@app.route('/random.php')
def random():
scraped = scrape(RANDOM)
if type(scraped) == str and scraped.startswith("REDIRECT"):
return redirect(scraped.replace("REDIRECT ", ""), 302)
return render(scraped)
if __name__ == '__main__':
app.run(port=8000)