2021-11-22 16:51:03 +00:00
|
|
|
package wikipedia
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"strings"
|
2021-11-22 17:08:34 +00:00
|
|
|
. "wikitil/internal/config"
|
2021-11-22 16:51:03 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const retries = 5
|
|
|
|
|
|
|
|
var illegalDescriptionParts = []string{
|
2021-11-22 17:23:58 +00:00
|
|
|
"disambiguation page",
|
2021-11-22 18:18:06 +00:00
|
|
|
"Wikimedia list article",
|
2021-11-22 16:51:03 +00:00
|
|
|
"Begriffsklärungsseite",
|
2021-11-22 18:18:06 +00:00
|
|
|
"Wikimedia-Liste",
|
2021-11-22 16:51:03 +00:00
|
|
|
}
|
|
|
|
|
2021-11-22 17:08:34 +00:00
|
|
|
func Get(config Config) (PageInfo, error){
|
2021-11-22 16:51:03 +00:00
|
|
|
retryLoop:
|
|
|
|
for i := 0; i < retries; i++ {
|
2021-11-22 17:08:34 +00:00
|
|
|
id, err := queryRandom(config.BaseUrl)
|
2021-11-22 16:51:03 +00:00
|
|
|
if err != nil {
|
|
|
|
return PageInfo{}, err
|
|
|
|
}
|
|
|
|
|
2021-11-22 17:08:34 +00:00
|
|
|
info, err := queryInfo(config.BaseUrl, id)
|
2021-11-22 16:51:03 +00:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
fmt.Println(err)
|
|
|
|
fmt.Println("Retrying...")
|
|
|
|
fmt.Println()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, part := range illegalDescriptionParts {
|
|
|
|
if strings.Contains(info.Description, part) {
|
|
|
|
fmt.Println("illegal description: " + info.Description)
|
|
|
|
i-- // illegal descriptions don't count towards retry limit
|
|
|
|
continue retryLoop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return info, err
|
|
|
|
}
|
|
|
|
return PageInfo{}, errors.New("retries exceeded")
|
|
|
|
}
|
|
|
|
|
|
|
|
func Format(info PageInfo) string {
|
|
|
|
var builder strings.Builder
|
|
|
|
builder.WriteString(info.Title)
|
|
|
|
builder.WriteString(":\n")
|
|
|
|
builder.WriteString(info.Description)
|
|
|
|
builder.WriteString("\n\n")
|
|
|
|
builder.WriteString(info.URL)
|
|
|
|
return builder.String()
|
|
|
|
}
|