tangled
alpha
login
or
join now
edavis.dev
/
bsky-feeds
1
fork
atom
this repo has no description
1
fork
atom
overview
issues
pulls
pipelines
feat: add videostream handler
Eric Davis
1 year ago
14bd694b
80c57926
+238
-1
7 changed files
expand all
collapse all
unified
split
Makefile
cmd
videostream
main.go
pkg
videostream
checkpoint.go
generator.go
handler.go
queue.go
schema.sql
+4
-1
Makefile
···
1
1
-
all: bin/mostliked bin/feedweb
1
1
+
all: bin/videostream bin/mostliked bin/feedweb
2
2
+
3
3
+
bin/videostream: cmd/videostream/*.go pkg/videostream/*.go
4
4
+
go build -o $@ ./cmd/videostream
2
5
3
6
bin/mostliked: cmd/mostliked/main.go pkg/mostliked/handler.go db/mostliked/*.go pkg/feeds/*.go
4
7
go build -o $@ ./cmd/mostliked
+67
cmd/videostream/main.go
···
1
1
+
package main
2
2
+
3
3
+
import (
4
4
+
"context"
5
5
+
"database/sql"
6
6
+
"log"
7
7
+
"os"
8
8
+
"os/signal"
9
9
+
"syscall"
10
10
+
11
11
+
jetstream "github.com/bluesky-social/jetstream/pkg/models"
12
12
+
"github.com/edavis/bsky-feeds/pkg/videostream"
13
13
+
"github.com/gorilla/websocket"
14
14
+
_ "github.com/mattn/go-sqlite3"
15
15
+
)
16
16
+
17
17
+
const JetstreamUrl = `wss://jetstream2.us-west.bsky.network/subscribe?wantedCollections=app.bsky.feed.post`
18
18
+
19
19
+
func main() {
20
20
+
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
21
21
+
defer stop()
22
22
+
23
23
+
conn, _, err := websocket.DefaultDialer.DialContext(ctx, JetstreamUrl, nil)
24
24
+
if err != nil {
25
25
+
log.Fatalf("failed to open websocket: %v\n", err)
26
26
+
}
27
27
+
defer func() {
28
28
+
if err := conn.Close(); err != nil {
29
29
+
log.Printf("failed to close websocket: %v\n", err)
30
30
+
}
31
31
+
log.Printf("websocket closed\n")
32
32
+
}()
33
33
+
34
34
+
dbCnx, err := sql.Open("sqlite3", "data/videostream.db?_journal=WAL&_fk=on&_timeout=5000&_sync=1&_txlock=immediate")
35
35
+
if err != nil {
36
36
+
log.Fatalf("failed to open database: %v\n", err)
37
37
+
}
38
38
+
defer func() {
39
39
+
if _, err := dbCnx.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
40
40
+
log.Printf("error doing final WAL checkpoint: %v\n", err)
41
41
+
}
42
42
+
if err := dbCnx.Close(); err != nil {
43
43
+
log.Printf("failed to close db: %v\n", err)
44
44
+
}
45
45
+
log.Printf("db closed\n")
46
46
+
}()
47
47
+
48
48
+
queue := videostream.NewQueue(1000)
49
49
+
go videostream.Handler(ctx, queue, dbCnx)
50
50
+
51
51
+
log.Printf("starting up\n")
52
52
+
go func() {
53
53
+
for {
54
54
+
var event jetstream.Event
55
55
+
err := conn.ReadJSON(&event)
56
56
+
if err != nil {
57
57
+
log.Printf("ReadJSON error: %v\n", err)
58
58
+
stop()
59
59
+
break
60
60
+
}
61
61
+
queue.Enqueue(event)
62
62
+
}
63
63
+
}()
64
64
+
65
65
+
<-ctx.Done()
66
66
+
log.Printf("shutting down\n")
67
67
+
}
+7
pkg/videostream/checkpoint.go
···
1
1
+
package videostream
2
2
+
3
3
+
type CheckpointResults struct {
4
4
+
Blocked int
5
5
+
Pages int
6
6
+
Transferred int
7
7
+
}
+1
pkg/videostream/generator.go
···
1
1
+
package videostream
+107
pkg/videostream/handler.go
···
1
1
+
package videostream
2
2
+
3
3
+
import (
4
4
+
"context"
5
5
+
"database/sql"
6
6
+
_ "embed"
7
7
+
"encoding/json"
8
8
+
"fmt"
9
9
+
"log"
10
10
+
"time"
11
11
+
12
12
+
appbsky "github.com/bluesky-social/indigo/api/bsky"
13
13
+
jetstream "github.com/bluesky-social/jetstream/pkg/models"
14
14
+
"github.com/edavis/bsky-feeds/pkg/feeds"
15
15
+
_ "github.com/mattn/go-sqlite3"
16
16
+
)
17
17
+
18
18
+
//go:embed schema.sql
19
19
+
var ddl string
20
20
+
21
21
+
func Handler(ctx context.Context, events *Queue, dbCnx *sql.DB) {
22
22
+
var (
23
23
+
dbTx *sql.Tx
24
24
+
err error
25
25
+
eventCount int
26
26
+
)
27
27
+
28
28
+
if _, err = dbCnx.ExecContext(ctx, ddl); err != nil {
29
29
+
log.Printf("could not create tables: %v\n", err)
30
30
+
}
31
31
+
if _, err = dbCnx.ExecContext(ctx, `PRAGMA wal_autocheckpoint = 0`); err != nil {
32
32
+
log.Printf("could not set PRAGMA wal_autocheckpoint: %v\n", err)
33
33
+
}
34
34
+
35
35
+
for {
36
36
+
select {
37
37
+
case <-ctx.Done():
38
38
+
return
39
39
+
default:
40
40
+
}
41
41
+
42
42
+
event, ok := events.Dequeue()
43
43
+
if !ok {
44
44
+
time.Sleep(100 * time.Millisecond)
45
45
+
continue
46
46
+
}
47
47
+
48
48
+
if dbTx == nil {
49
49
+
dbTx, err = dbCnx.BeginTx(ctx, nil)
50
50
+
if err != nil {
51
51
+
log.Printf("failed to begin transaction: %v\n", err)
52
52
+
}
53
53
+
}
54
54
+
55
55
+
if event.Kind != jetstream.EventKindCommit {
56
56
+
continue
57
57
+
}
58
58
+
59
59
+
if event.Commit.Operation != jetstream.CommitOperationCreate {
60
60
+
continue
61
61
+
}
62
62
+
63
63
+
commit := *event.Commit
64
64
+
var post appbsky.FeedPost
65
65
+
if err = json.Unmarshal(commit.Record, &post); err != nil {
66
66
+
log.Printf("error parsing commit.Record: %v\n", err)
67
67
+
continue
68
68
+
}
69
69
+
70
70
+
if post.Embed != nil && post.Embed.EmbedVideo != nil {
71
71
+
uri := fmt.Sprintf("at://%s/%s/%s", event.Did, commit.Collection, commit.RKey)
72
72
+
ts := feeds.SafeTimestamp(post.CreatedAt)
73
73
+
dbTx.ExecContext(ctx, `insert or ignore into posts (uri, create_ts) values (?, ?)`, uri, ts)
74
74
+
} else {
75
75
+
continue
76
76
+
}
77
77
+
78
78
+
eventCount += 1
79
79
+
if eventCount%25 == 0 {
80
80
+
// TODO trim
81
81
+
82
82
+
if err = dbTx.Commit(); err != nil {
83
83
+
log.Printf("commit failed: %v\n", err)
84
84
+
}
85
85
+
86
86
+
var results CheckpointResults
87
87
+
err = dbCnx.QueryRowContext(ctx, `PRAGMA wal_checkpoint(RESTART)`).Scan(&results.Blocked, &results.Pages, &results.Transferred)
88
88
+
switch {
89
89
+
case err != nil:
90
90
+
log.Printf("failed checkpoint: %v\n", err)
91
91
+
case results.Blocked == 1:
92
92
+
log.Printf("checkpoint: blocked\n")
93
93
+
case results.Pages == results.Transferred:
94
94
+
log.Printf("checkpoint: %d pages transferred\n", results.Transferred)
95
95
+
case results.Pages != results.Transferred:
96
96
+
log.Printf("checkpoint: %d pages, %d transferred\n", results.Pages, results.Transferred)
97
97
+
}
98
98
+
99
99
+
dbTx, err = dbCnx.BeginTx(ctx, nil)
100
100
+
if err != nil {
101
101
+
log.Printf("failed to begin transaction: %v\n", err)
102
102
+
}
103
103
+
104
104
+
log.Printf("queue size: %d\n", events.Size())
105
105
+
}
106
106
+
}
107
107
+
}
+46
pkg/videostream/queue.go
···
1
1
+
package videostream
2
2
+
3
3
+
import (
4
4
+
"sync"
5
5
+
6
6
+
jetstream "github.com/bluesky-social/jetstream/pkg/models"
7
7
+
)
8
8
+
9
9
+
type Queue struct {
10
10
+
lk sync.Mutex
11
11
+
events []jetstream.Event
12
12
+
}
13
13
+
14
14
+
func NewQueue(capacity int) *Queue {
15
15
+
return &Queue{
16
16
+
events: make([]jetstream.Event, 0, capacity),
17
17
+
}
18
18
+
}
19
19
+
20
20
+
func (q *Queue) Enqueue(event jetstream.Event) {
21
21
+
q.lk.Lock()
22
22
+
defer q.lk.Unlock()
23
23
+
24
24
+
q.events = append(q.events, event)
25
25
+
}
26
26
+
27
27
+
func (q *Queue) Dequeue() (jetstream.Event, bool) {
28
28
+
q.lk.Lock()
29
29
+
defer q.lk.Unlock()
30
30
+
31
31
+
if len(q.events) == 0 {
32
32
+
var e jetstream.Event
33
33
+
return e, false
34
34
+
}
35
35
+
36
36
+
event := q.events[0]
37
37
+
q.events = q.events[1:]
38
38
+
return event, true
39
39
+
}
40
40
+
41
41
+
func (q *Queue) Size() int {
42
42
+
q.lk.Lock()
43
43
+
defer q.lk.Unlock()
44
44
+
45
45
+
return len(q.events)
46
46
+
}
+6
pkg/videostream/schema.sql
···
1
1
+
create table if not exists posts (
2
2
+
uri text primary key,
3
3
+
create_ts int not null
4
4
+
);
5
5
+
6
6
+
create index if not exists ts_idx on posts(create_ts);