Skip to content

Commit

Permalink
fetch a story POC
Browse files Browse the repository at this point in the history
  • Loading branch information
rivernews committed Sep 17, 2022
1 parent bda216e commit e29fdc8
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 26 deletions.
2 changes: 1 addition & 1 deletion cloud_module/landing_s3_trigger.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ resource "aws_s3_bucket_notification" "bucket_notification" {
lambda_function_arn = module.landing_parse_metadata_lambda.lambda_function_arn
events = ["s3:ObjectCreated:*"]
filter_prefix = "${local.newssite_economy_alias}/"
filter_suffix = ".html"
filter_suffix = "landing.html"
}

lambda_function {
Expand Down
25 changes: 1 addition & 24 deletions lambda_golang/cmd/landing/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,9 @@ package main
import (
"context"
"fmt"
"io"
"net/http"
"strings"
"time"

"golang.org/x/net/html/charset"

"github.com/aws/aws-lambda-go/lambda"

"github.com/rivernews/GoTools"
Expand All @@ -33,27 +29,8 @@ type LambdaResponse struct {

func HandleRequest(ctx context.Context, name LambdaEvent) (LambdaResponse, error) {
newsSite := newssite.GetNewsSite("NEWSSITE_ECONOMY")
resp, err := http.Get(newsSite.LandingURL)
if err != nil {
// handle error
GoTools.Logger("ERROR", err.Error())
}
defer resp.Body.Close()

contentType := resp.Header.Get("Content-Type") // Optional, better guessing
GoTools.Logger("INFO", "ContentType is ", contentType)
utf8reader, err := charset.NewReader(resp.Body, contentType)
if err != nil {
GoTools.Logger("ERROR", err.Error())
}

body, err := io.ReadAll(utf8reader)
if err != nil {
// handle error
GoTools.Logger("ERROR", err.Error())
}
bodyText := string(body)

bodyText := newssite.Fetch(newsSite.LandingURL)
GoTools.Logger("INFO", "In golang runtime now!\n\n```\n "+bodyText[:500]+"\n ...```\n End of message")

// scraper
Expand Down
12 changes: 12 additions & 0 deletions lambda_golang/cmd/story/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"fmt"
"strings"

"github.com/aws/aws-lambda-go/events"
"github.com/aws/aws-lambda-go/lambda"
Expand Down Expand Up @@ -34,12 +35,23 @@ func HandleRequest(ctx context.Context, S3Event events.S3Event) (LambdaResponse,

GoTools.Logger("INFO", fmt.Sprintf("S3 event ``` %s ```\n ", newssite.AsJson(record)))

metadataS3KeyTokens := strings.Split(record.S3.Object.URLDecodedKey, "/")
newsSiteAlias := metadataS3KeyTokens[0]
landingPageTimeStamp := metadataS3KeyTokens[len(metadataS3KeyTokens)-2]

metadataJSONString := cloud.Pull(record.S3.Object.URLDecodedKey)
var metadata newssite.LandingPageMetadata
newssite.FromJson([]byte(metadataJSONString), &metadata)

GoTools.Logger("INFO", fmt.Sprintf("Test first story: %d:%d", len(metadata.Stories), len(metadata.UntitledStories)))

story := metadata.Stories[0]
storyHtmlBodyText := newssite.Fetch(story.URL)
cloud.Archive(cloud.ArchiveArgs{
BodyText: storyHtmlBodyText,
Key: fmt.Sprintf("%s/stories/%s-%s/story.html", newsSiteAlias, landingPageTimeStamp, story.Name),
})

/*
storyChunk := message.Body
GoTools.Logger("INFO", fmt.Sprintf("Story consumer! story chunk: %s", storyChunk))
Expand Down
2 changes: 1 addition & 1 deletion lambda_golang/pkg/newssite/economy.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func GetStoriesFromEconomy(body string) LandingPageMetadata {
var emptyTitleURLs strings.Builder
doc.Find("a[href$=html]").Each(func(i int, anchor *goquery.Selection) {
topic := Topic{
Name: strings.TrimSpace(anchor.Text()),
Name: strings.ReplaceAll(strings.TrimSpace(anchor.Text()), "/", "-"),
Description: "",
URL: strings.TrimSpace(anchor.AttrOr("href", "-")),
}
Expand Down
27 changes: 27 additions & 0 deletions lambda_golang/pkg/newssite/utilities.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ package newssite

import (
"encoding/json"
"io"
"net/http"
"strings"

"golang.org/x/net/html/charset"

"github.com/rivernews/GoTools"
)

Expand Down Expand Up @@ -40,3 +44,26 @@ func FromJson(b []byte, structInstance any) {
GoTools.Logger("ERROR", err.Error())
}
}

func Fetch(url string) string {
resp, err := http.Get(url)
if err != nil {
// handle error
GoTools.Logger("ERROR", err.Error())
}
defer resp.Body.Close()

contentType := resp.Header.Get("Content-Type") // Optional, better guessing
GoTools.Logger("DEBUG", "ContentType is ", contentType)
utf8reader, err := charset.NewReader(resp.Body, contentType)
if err != nil {
GoTools.Logger("ERROR", err.Error())
}

body, err := io.ReadAll(utf8reader)
if err != nil {
// handle error
GoTools.Logger("ERROR", err.Error())
}
return string(body)
}

0 comments on commit e29fdc8

Please sign in to comment.