Skip to content

Commit

Permalink
Ready to test (#36)
Browse files Browse the repository at this point in the history
* Ready to test

* Fix db field first char not lowercase
Tracked by #25 (comment)

* Fix permission of db index, S3 pull
Tracked by #25 (comment)

* All tests complete
Tracked by #25 (comment)
  • Loading branch information
rivernews authored Sep 29, 2022
1 parent 2c8f841 commit 4ec0693
Show file tree
Hide file tree
Showing 24 changed files with 444 additions and 369 deletions.
11 changes: 5 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
**credential**
**/builds/**

lambda_golang/landing
lambda_golang/landing_s3_trigger
lambda_golang/landing_metadata_cronjob
lambda_golang/stories
lambda_golang/story
lambda_golang/stories_finalizer
lambda_golang/*
!lambda_golang/go.mod
!lambda_golang/go.sum
!lambda_golang/*/
!lambda_golang/*/**
venv

# Binaries for programs and plugins
Expand Down
1 change: 0 additions & 1 deletion cloud_environments/terraform.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ set +o allexport
if (
cd $GOLANG_SRC_DIR && \
go build ./cmd/landing && \
go build ./cmd/landing_s3_trigger && \
go build ./cmd/landing_metadata_cronjob && \
go build ./cmd/stories && \
go build ./cmd/story && \
Expand Down
17 changes: 15 additions & 2 deletions cloud_module/dynamodb/table.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ resource "aws_ssm_parameter" "media_table" {

// https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/dynamodb_table#attributes-reference
resource "aws_dynamodb_table" "media_table" {
name = "Mediatable"
name = "${title(replace("${var.project_alias}_${var.environment_name}", "-", "_"))}"
billing_mode = "PROVISIONED"
read_capacity = 20
write_capacity = 20
Expand All @@ -23,8 +23,12 @@ resource "aws_dynamodb_table" "media_table" {
type = "S"
}

attribute {
name = "s3Key"
type = "S"
}

// other fields
// S3 key
// docType = {landing | story | landingMetadata | ...}
// events

Expand Down Expand Up @@ -58,6 +62,15 @@ resource "aws_dynamodb_table" "media_table" {
non_key_attributes = ["s3Key"]
}

global_secondary_index {
name = "s3KeyIndex"
hash_key = "s3Key"
range_key = "createdAt"
write_capacity = 10
read_capacity = 10
projection_type = "KEYS_ONLY"
}

tags = {
Project = local.project_name
Environment = var.environment_name
Expand Down
2 changes: 1 addition & 1 deletion cloud_module/pipeline/global_ssm.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ locals {
newssite_economy_tokens = split(",", data.aws_ssm_parameter.newssite_economy.value)
newssite_economy_alias = local.newssite_economy_tokens[2]

_media_table_tokens = split(",", data.aws_ssm_parameter.media_table)
_media_table_tokens = split(",", data.aws_ssm_parameter.media_table.value)
media_table_arn = local._media_table_tokens[0]
media_table_id = local._media_table_tokens[1]
}
10 changes: 9 additions & 1 deletion cloud_module/pipeline/lambda.tf
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ module "step_function" {
module "scraper_lambda" {
source = "terraform-aws-modules/lambda/aws"
create_function = true
function_name = "${local.project_name}-scraper-lambda"
function_name = "${local.project_name}-landing-lambda"
description = "Lambda function for scraping"
handler = "landing"
runtime = "go1.x"
Expand All @@ -82,6 +82,13 @@ module "scraper_lambda" {

attach_policy_statements = true
policy_statements = {
allow_db_query = {
effect = "Allow",
actions = [
"dynamodb:PutItem"
],
resources = [local.media_table_arn]
}
s3_archive_bucket = {
effect = "Allow",
actions = [
Expand All @@ -98,6 +105,7 @@ module "scraper_lambda" {
S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id

NEWSSITE_ECONOMY = data.aws_ssm_parameter.newssite_economy.value
DYNAMODB_TABLE_ID = local.media_table_id
}

tags = {
Expand Down
70 changes: 12 additions & 58 deletions cloud_module/pipeline/s3_triggers.tf
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
resource "aws_s3_bucket_notification" "bucket_notification" {
bucket = data.aws_s3_bucket.archive.id

lambda_function {
lambda_function_arn = module.landing_s3_trigger_lambda.lambda_function_arn
events = ["s3:ObjectCreated:*"]
filter_prefix = "${local.newssite_economy_alias}/"
filter_suffix = "landing.html"
}

lambda_function {
lambda_function_arn = module.landing_metadata_s3_trigger_lambda.lambda_function_arn
events = ["s3:ObjectCreated:*"]
Expand All @@ -16,19 +9,10 @@ resource "aws_s3_bucket_notification" "bucket_notification" {
}

depends_on = [
aws_lambda_permission.allow_bucket_trigger_by_landing,
aws_lambda_permission.allow_bucket_trigger_by_landing_metadata
]
}

resource "aws_lambda_permission" "allow_bucket_trigger_by_landing" {
statement_id = "AllowExecutionFromS3Bucket"
action = "lambda:InvokeFunction"
function_name = module.landing_s3_trigger_lambda.lambda_function_arn
principal = "s3.amazonaws.com"
source_arn = data.aws_s3_bucket.archive.arn
}

resource "aws_lambda_permission" "allow_bucket_trigger_by_landing_metadata" {
statement_id = "AllowExecutionFromS3Bucket"
action = "lambda:InvokeFunction"
Expand All @@ -37,52 +21,11 @@ resource "aws_lambda_permission" "allow_bucket_trigger_by_landing_metadata" {
source_arn = data.aws_s3_bucket.archive.arn
}

module "landing_s3_trigger_lambda" {
source = "terraform-aws-modules/lambda/aws"
create_function = true
function_name = "${local.project_name}-landing-s3-trigger-lambda"
description = "Put a landing page in db"
handler = "landing_s3_trigger"
runtime = "go1.x"

source_path = [{
path = "${var.repo_dir}/lambda_golang/"
commands = ["${local.go_build_flags} go build ./cmd/landing_s3_trigger", ":zip"]
patterns = ["landing_s3_trigger"]
}]

timeout = 900
cloudwatch_logs_retention_in_days = 7
publish = true

attach_policy_statements = true
policy_statements = {
allow_db_put = {
effect = "Allow",
actions = [
"dynamodb:PutItem",
],
resources = [media_table_arn]
}
}

environment_variables = {
SLACK_WEBHOOK_URL = var.slack_post_webhook_url
LOG_LEVEL = "DEBUG"
DEBUG = "true"
DYNAMODB_TABLE_ID = media_table_id
}

tags = {
Project = local.project_name
}
}

module "landing_metadata_s3_trigger_lambda" {
source = "terraform-aws-modules/lambda/aws"

create_function = true
function_name = "${local.project_name}-fetch-stories"
function_name = "${local.project_name}-stories-lambda"
description = "Fetch ${local.project_name} stories; triggered by metadata.json creation"
handler = "stories"
runtime = "go1.x"
Expand Down Expand Up @@ -117,6 +60,15 @@ EOF

attach_policy_statements = true
policy_statements = {
allow_db_put = {
effect = "Allow",
actions = [
"dynamodb:UpdateItem",
],
resources = [
local.media_table_arn,
]
}
s3_archive_bucket = {
effect = "Allow",
actions = [
Expand All @@ -141,8 +93,10 @@ EOF
SLACK_WEBHOOK_URL = var.slack_post_webhook_url
LOGLEVEL = "DEBUG"
ENV = local.environment
DEBUG = "true"

S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id
DYNAMODB_TABLE_ID = local.media_table_id
SFN_ARN = module.batch_stories_sfn.state_machine_arn
}

Expand Down
10 changes: 7 additions & 3 deletions cloud_module/pipeline/scheduler.tf
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ resource "aws_cloudwatch_event_target" "landing_metadata_scheduler_event_target"
module landing_metadata_cronjob_lambda {
source = "terraform-aws-modules/lambda/aws"
create_function = true
function_name = "${local.project_name}-batch-stories-fetch-parse"
function_name = "${local.project_name}-landing-metadata-cronjob-lambda"
description = "Query landing pages in db; compute & archive their metadata"
handler = "landing_metadata_cronjob"
runtime = "go1.x"
Expand All @@ -107,11 +107,15 @@ module landing_metadata_cronjob_lambda {
"dynamodb:Query",
"dynamodb:UpdateItem",
],
resources = [media_table_arn]
resources = [
local.media_table_arn,
"${local.media_table_arn}/index/metadataIndex"
]
}
s3_archive_bucket = {
effect = "Allow",
actions = [
"s3:GetObject",
"s3:PutObject",
],
resources = [
Expand All @@ -136,7 +140,7 @@ module landing_metadata_cronjob_lambda {
LOG_LEVEL = "DEBUG"
DEBUG = "true"
S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id
DYNAMODB_TABLE_ID = media_table_id
DYNAMODB_TABLE_ID = local.media_table_id
}

tags = {
Expand Down
4 changes: 2 additions & 2 deletions cloud_module/pipeline/sfn_def/batch_stories_def.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"Parameters": {
"story.$": "$$.Map.Item.Value",
"newsSiteAlias.$": "$.newsSiteAlias",
"landingPageUuid.$": "$.landingPageUuid",
"landingPageTimeStamp.$": "$.landingPageTimeStamp"
},
"Iterator": {
Expand All @@ -32,8 +33,7 @@
}
}
},
"Next": "Stories-Finalizer",
"End": false
"Next": "Stories-Finalizer"
},
"Stories-Finalizer": {
"Type":"Task",
Expand Down
23 changes: 19 additions & 4 deletions cloud_module/pipeline/stories_sfn.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ module batch_stories_sfn {
service_integrations = {
lambda = {
lambda = [
module.fetch_story_lambda.lambda_function_arn
module.fetch_story_lambda.lambda_function_arn,
module.stories_finalizer_lambda.lambda_function_arn
]
}
Expand All @@ -31,7 +31,7 @@ module batch_stories_sfn {
module fetch_story_lambda {
source = "terraform-aws-modules/lambda/aws"
create_function = true
function_name = "${local.project_name}-fetch-story"
function_name = "${local.project_name}-story-lambda"
description = "Fetch and archive a story page"
handler = "story"
runtime = "go1.x"
Expand All @@ -49,6 +49,15 @@ module fetch_story_lambda {

attach_policy_statements = true
policy_statements = {
allow_db_put = {
effect = "Allow",
actions = [
"dynamodb:PutItem",
],
resources = [
local.media_table_arn,
]
}
s3_archive_bucket = {
effect = "Allow",
actions = [
Expand Down Expand Up @@ -77,6 +86,8 @@ module fetch_story_lambda {
LOG_LEVEL = "DEBUG"
DEBUG = "true"
S3_ARCHIVE_BUCKET = data.aws_s3_bucket.archive.id

DYNAMODB_TABLE_ID = local.media_table_id
}

tags = {
Expand Down Expand Up @@ -107,17 +118,21 @@ module "stories_finalizer_lambda" {
allow_db_put = {
effect = "Allow",
actions = [
"dynamodb:Query",
"dynamodb:UpdateItem",
],
resources = [media_table_arn]
resources = [
local.media_table_arn,
"${local.media_table_arn}/index/s3KeyIndex"
]
}
}

environment_variables = {
SLACK_WEBHOOK_URL = var.slack_post_webhook_url
LOG_LEVEL = "DEBUG"
DEBUG = "true"
DYNAMODB_TABLE_ID = media_table_id
DYNAMODB_TABLE_ID = local.media_table_id
}

tags = {
Expand Down
16 changes: 14 additions & 2 deletions lambda_golang/cmd/landing/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/rivernews/GoTools"

"github.com/rivernews/media-literacy/pkg/cloud"
"github.com/rivernews/media-literacy/pkg/common"
"github.com/rivernews/media-literacy/pkg/newssite"
)

Expand All @@ -29,7 +30,7 @@ type LambdaResponse struct {
func HandleRequest(ctx context.Context, name LambdaEvent) (LambdaResponse, error) {
newsSite := newssite.GetNewsSite("NEWSSITE_ECONOMY")

bodyText := newssite.Fetch(newsSite.LandingURL)
bodyText := common.Fetch(newsSite.LandingURL)
GoTools.Logger("INFO", "In golang runtime now!\n\n```\n "+bodyText[:500]+"\n ...```\n End of message")

// scraper
Expand All @@ -56,10 +57,21 @@ func HandleRequest(ctx context.Context, name LambdaEvent) (LambdaResponse, error
GoTools.Logger("INFO", successMessage)

// S3 archive
landingPageS3Key := fmt.Sprintf("%s/daily-headlines/%s/landing.html", newsSite.Alias, common.Now())
cloud.Archive(cloud.ArchiveArgs{
BodyText: bodyText,
Key: fmt.Sprintf("%s/daily-headlines/%s/landing.html", newsSite.Alias, newssite.Now()),
Key: landingPageS3Key,
})
out := cloud.DynamoDBPutItem(ctx, newssite.MediaTableItem{
S3Key: landingPageS3Key,
DocType: newssite.DOCTYPE_LANDING,
Events: []newssite.MediaTableItemEvent{
newssite.GetEventLandingPageFetched(newsSite.Alias, landingPageS3Key),
newssite.GetEventLandingMetadataRequested(landingPageS3Key),
},
IsDocTypeWaitingForMetadata: newssite.DOCTYPE_LANDING,
})
GoTools.Logger("DEBUG", fmt.Sprintf("```%s```\n", GoTools.AsJson(out)))

return LambdaResponse{
OK: true,
Expand Down
Loading

0 comments on commit 4ec0693

Please sign in to comment.