This repository was archived by the owner on Feb 26, 2025. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy patharticles.go
101 lines (90 loc) · 2.81 KB
/
articles.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// lwnfeed - A full-text RSS feed generator for LWN.net.
// Copyright (C) 2020-2022 Tulir Asokan
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package main
import (
"errors"
"fmt"
"net/url"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/gorilla/feeds"
"github.com/mmcdole/gofeed"
log "maunium.net/go/maulogger/v2"
)
var (
unknownLink = errors.New("unknown feed item link format")
invalidItemID = errors.New("feed item link has invalid format")
)
func loadArticleContent(id int) (*feeds.Item, error) {
log.Debugfln("Loading content of article %d from LWN.net", id)
link, err := url.Parse(fmt.Sprintf("https://lwn.net/Articles/%d/", id))
if err != nil {
return nil, fmt.Errorf("failed to parse URL: %w", err)
}
resp, err := client.Get(link.String())
if err != nil {
return nil, fmt.Errorf("failed to fetch article: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
return nil, fmt.Errorf("unexpected response status %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to parse article HTML: %w", err)
}
title := doc.Find(".PageHeadline > h1").Text()
body, err := doc.Find(".ArticleText").Html()
if err != nil {
return nil, fmt.Errorf("failed to get article body: %w", err)
}
log.Infofln("Successfully loaded content of article %d", id)
return &feeds.Item{
Link: &feeds.Link{Href: link.String()},
Title: title,
Content: body,
Id: strconv.Itoa(id),
}, nil
}
func handleInputFeedItem(input *gofeed.Item) (*feeds.Item, error) {
if !strings.HasPrefix(input.Link, "https://lwn.net/Articles/") {
return nil, unknownLink
}
id, err := strconv.Atoi(strings.Split(input.Link, "/")[4])
if err != nil {
return nil, invalidItemID
}
output, ok := cachedArticles[id]
if ok {
log.Debugln("Found article", id, "in cache")
return output, nil
}
output, err = loadArticleContent(id)
if err != nil {
return nil, err
}
output.Description = input.Description
output.Created = *input.PublishedParsed
if len(input.Authors) > 0 {
output.Author = &feeds.Author{
Name: input.Authors[0].Name,
Email: input.Authors[0].Email,
}
}
addToCache(id, output)
return output, nil
}