WeatherNews.jl/src/db.jl

213 lines
5.7 KiB
Julia

using SQLite
using DataFrames
using Dates
using TimeZones
using WeatherNews.Helpers
"""
sql_fn(db)
Return a function that takes an SQL and optional bind parameters
and runs the query and returns the results as a DataFrame.
```julia
db = SQLite.DB("sql/wn.db")
sql = WeatherNews.DB.sql_fn(db)
res = sql("SELECT * FROM cancellation")
```
"""
function sql_fn(db)
function sql(query, params=[])
DBInterface.execute(db, query, params) |> DataFrame
end
end
"""
Find or create a caster by short name `n`.
"""
function find_or_create_caster(db, n)
sql_select = "SELECT * FROM caster WHERE n = ? LIMIT 1"
df = DBInterface.execute(db, sql_select, [n]) |> DataFrame
if df.id != Union{Missing,Int64}[]
return df
else
DBInterface.execute(db, "INSERT INTO caster (n) VALUES (?)", [n])
df = DBInterface.execute(db, sql_select, [n]) |> DataFrame
return df
end
end
"""
Find all casters
"""
function find_all_casters(db)
sql_select = "SELECT * FROM CASTER"
return DBInterface.execute(db, sql_select) |> DataFrame
end
function find_recent_WNL_schedule(db)
# will return most recent 48 hours of schedule entries at least, including upcoming and past days'
# limit to 28 assumes worst case scenario that all recent segments are tag-teamed
sql_select = "SELECT id, jst FROM schedule WHERE segment_id IS NOT 8 ORDER BY jst DESC limit 28;"
s = DBInterface.execute(db, sql_select) |> DataFrame
s.jst = jst2zdt.(s.jst)
s
end
"""
Find a segment by short name `n`.
"""
function find_segment(db, n)
return DBInterface.execute(db, "SELECT * FROM segment WHERE n = ? LIMIT 1", [n]) |> DataFrame
end
"""
Insert a new `row` into schedule if possible.
If a row with the same `jst` value already exists, it will silently fail on purpose.
"""
function insert_schedule(db, row)
# find caster
caster_id = if row[:caster] == ""
missing
else
caster = find_or_create_caster(db, row[:caster])
caster.id[1]
end
# find segment
segment = find_segment(db, row[:title])
# insert row
try
sql_insert = "INSERT INTO schedule (caster_id, segment_id, jst) VALUES (?, ?, ?)"
timestamp = Dates.format(row[:t], "Y-mm-ddTHH:MM:SSz")
#@info "before insert" caster_id, segment.id[1] timestamp
ri = DBInterface.execute(db, sql_insert, [caster_id, segment.id[1], timestamp])
return ri
catch e
@debug e
return false
end
end
"""
Return a row from the schedule based on its `jst` time.
"""
function find_schedule_by_jst(db, jst::String)
df = DBInterface.execute(db,
"SELECT * FROM schedule WHERE jst = ? AND segment_id IS NOT 8 LIMIT 1",
[jst]) |> DataFrame
if size(df, 1) == 0
return missing
else
return eachrow(df)[1]
end
end
"Update a schedule row with its `video_id` and `view_count`."
function update_schedule_with_video(db, video_id::String, view_count::Union{Int,Missing}, id::Int)
try
sql_update = "UPDATE schedule SET video_id = ?, view_count = ? WHERE id = ?;"
DBInterface.execute(db, sql_update, [video_id, view_count, id])
catch e
@debug e
return e
end
end
"Update a schedule row with its `view_count`."
function update_schedule_with_viewcount(db, view_count::Int, id::Int)
try
sql_update = "UPDATE schedule SET view_count = ? WHERE id = ?;"
DBInterface.execute(db, sql_update, [view_count, id])
catch e
@debug e
return e
end
end
"""
Cancel a schedule, and replace the entry with a new caster (or none at all).
If there's no new caster to fill in, pass in `missing` as the `new_caster_id`.
"""
function cancel_schedule(db, schedule_id, new_caster_id)
sql_insert = "INSERT INTO cancellation SELECT id, caster_id, segment_id, jst FROM schedule WHERE id = ?"
sql_update = "UPDATE schedule SET caster_id = ? WHERE id = ?"
try
return DBInterface.transaction(db) do
DBInterface.execute(db, sql_insert, [schedule_id])
DBInterface.execute(db, sql_update, [new_caster_id, schedule_id])
end
catch e
@debug e
return e
end
end
"""
Load the entire schedule into a DataFrame
"""
function load_schedule_joined(db)
sql_select = """
SELECT s.id AS id,
seg.n AS segment,
c.n AS caster,
s.video_id AS video_id,
s.view_count AS view_count,
m.val AS mscale,
s.jst AS jst
FROM schedule s
JOIN segment seg ON seg.id = s.segment_id
LEFT JOIN caster c ON c.id = s.caster_id
LEFT JOIN mscale m ON m.jst = s.jst
ORDER BY jst
"""
s = DBInterface.execute(db, sql_select) |> DataFrame
s.jst = jst2zdt.(s.jst)
return s
end
#=
using WeatherNews
using WeatherNews: API, DB
using DataFrames
using DataFramesMeta
using SQLite
using Statistics
db = SQLite.DB("sql/wn.db")
s = DB.load_schedule_joined(db)
# Try finding the median instead of the average.
rs = @chain s begin
@subset(:segment .!= "au PAY")
dropmissing(:view_count)
dropmissing(:caster)
@subset(:view_count .!= 0)
@select(:caster, :view_count)
@groupby(:caster)
@combine begin
:median_views = median(:view_count)
:average_views = mean(:view_count)
end
sort(:median_views, rev=true)
end
# Summarize views by day
tf = Day(1)
rs = @chain s begin
dropmissing(:view_count)
@subset(:view_count .!= 0)
@subset(hour.(:jst) .!= 23)
@transform(:jst2 = floor.(:jst, tf))
@groupby(:jst2)
@combine begin
:views = sum(:view_count)
end
@subset(:views .> 100000)
@select(:jst = :jst2, :views)
end
=#