AWSTemplateFormatVersion: 2010-09-09
Resources:
GlueDatabase01:
Type: AWS::Glue::Database
Properties:
DatabaseInput:
Name: sample_glue_database
CatalogId: !Ref AWS::AccountId
# reference: https://docs.aws.amazon.com/ja_jp/athena/latest/ug/create-cloudtrail-table-partition-projection.html
CloudTrailTable01:
Type: AWS::Glue::Table
Properties:
DatabaseName: !Ref GlueDatabase01
CatalogId: !Ref AWS::AccountId
TableInput:
TableType: EXTERNAL_TABLE
Name: cloud_trail
PartitionKeys:
- Name: account
Type: string
- Name: region
Type: string
- Name: timestamp
Type: string
Parameters:
projection.enabled: true
projection.account.type: integer
projection.account.digit: 12
projection.account.range: 000000000000,999999999999
projection.timestamp.type: date
projection.timestamp.range: NOW-7YEARS,NOW
projection.timestamp.format: yyyy/MM/dd
projection.timestamp.interval: 1
projection.timestamp.interval.unit: DAYS
projection.region.type: enum
projection.region.values: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2,eu-south-1,eu-west-3,eu-north-1,me-south-1,sa-east-1
EXTERNAL: TRUE
storage.location.template: !Sub s3://sample-audit-bucket-${AWS::AccountId}-${AWS::Region}/AWSLogs/${!account}/CloudTrail/${!region}/${!timestamp}
StorageDescriptor:
Location: !Sub s3://sample-audit-bucket-${AWS::AccountId}-${AWS::Region}/AWSLogs/
InputFormat: com.amazon.emr.cloudtrail.CloudTrailInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: false
SerdeInfo:
SerializationLibrary: com.amazon.emr.hive.serde.CloudTrailSerde
StoredAsSubDirectories: false
Columns:
- Name: eventversion
Type: string
- Name: useridentity
Type: struct<type:string,principalId:string,arn:string,accountId:string,invokedBy:string,accessKeyId:string,userName:string,sessionContext:struct<attributes:struct<mfaAuthenticated:string,creationDate:string>,sessionIssuer:struct<type:string,principalId:string,arn:string,accountId:string,userName:string>>>
- Name: eventtime
Type: string
- Name: eventsource
Type: string
- Name: eventname
Type: string
- Name: awsregion
Type: string
- Name: sourceipaddress
Type: string
- Name: useragent
Type: string
- Name: errorcode
Type: string
- Name: errormessage
Type: string
- Name: requestparameters
Type: string
- Name: responseelements
Type: string
- Name: additionaleventdata
Type: string
- Name: requestid
Type: string
- Name: eventid
Type: string
- Name: resources
Type: array<struct<arn:string,accountId:string,type:string>>
- Name: eventtype
Type: string
- Name: apiversion
Type: string
- Name: readonly
Type: string
- Name: recipientaccountid
Type: string
- Name: serviceeventdetails
Type: string
- Name: sharedeventid
Type: string
- Name: vpcendpointid
Type: string
# 参考: https://aws.amazon.com/jp/blogs/mt/how-to-query-your-aws-resource-configuration-states-using-aws-config-and-amazon-athena/
ConfigTable01:
Type: AWS::Glue::Table
Properties:
CatalogId: !Ref AWS::AccountId
DatabaseName: !Ref GlueDatabase01
TableInput:
Name: config
PartitionKeys:
- Name: account
Type: string
- Name: region
Type: string
- Name: timestamp
Type: string
Parameters:
projection.enabled: true
projection.account.type: integer
projection.account.digit: 12
projection.account.range: 000000000000,999999999999
projection.timestamp.type: date
projection.timestamp.range: NOW-7YEARS,NOW
projection.timestamp.format: yyyy/MM/dd
projection.timestamp.interval: 1
projection.timestamp.interval.unit: DAYS
projection.region.type: enum
projection.region.values: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2,eu-south-1,eu-west-3,eu-north-1,me-south-1,sa-east-1
storage.location.template: !Sub s3://sample-audit-bucket-${AWS::AccountId}-${AWS::Region}/AWSLogs/${!account}/Config/${!region}/${!timestamp}
TableType: EXTERNAL_TABLE
StorageDescriptor:
Location: !Sub s3://sample-audit-bucket-${AWS::AccountId}-${AWS::Region}/AWSLogs/
InputFormat: com.amazon.emr.cloudtrail.CloudTrailInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
SerdeInfo:
SerializationLibrary: org.apache.hive.hcatalog.data.JsonSerDe
Columns:
- Name: fileversion
Type: string
- Name: configSnapshotId
Type: string
- Name: configurationitems
Type: array<struct<configurationItemVersion:string,configurationItemCaptureTime:string,configurationStateId:bigint,awsAccountId:string,configurationItemStatus:string,resourceType:string,resourceId:string,resourceName:string,ARN:string,awsRegion:string,availabilityZone:string,configurationStateMd5Hash:string,resourceCreationTime:string>>
# reference: https://docs.aws.amazon.com/ja_jp/athena/latest/ug/querying-guardduty.html
GuardDutyTable01:
Type: AWS::Glue::Table
Properties:
DatabaseName: !Ref GlueDatabase01
CatalogId: !Ref AWS::AccountId
TableInput:
TableType: EXTERNAL_TABLE
Name: guard_duty
PartitionKeys:
- Name: account
Type: string
- Name: region
Type: string
- Name: timestamp
Type: string
Parameters:
projection.enabled: true
projection.account.type: integer
projection.account.digit: 12
projection.account.range: 000000000000,999999999999
projection.timestamp.type: date
projection.timestamp.range: NOW-1YEARS,NOW
projection.timestamp.format: yyyy/MM/dd
projection.timestamp.interval: 1
projection.timestamp.interval.unit: DAYS
projection.region.type: enum
projection.region.values: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2,eu-south-1,eu-west-3,eu-north-1,me-south-1,sa-east-1
EXTERNAL: TRUE
storage.location.template: !Sub s3://sample-audit-bucket-${AWS::AccountId}-${AWS::Region}/AWSLogs/${!account}/GuardDuty/${!region}/${!timestamp}
StorageDescriptor:
Location: !Sub s3://sample-audit-bucket-${AWS::AccountId}-${AWS::Region}/AWSLogs/
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: false
SerdeInfo:
SerializationLibrary: org.openx.data.jsonserde.JsonSerDe
StoredAsSubDirectories: false
Columns:
- Name: schemaversion
Type: string
- Name: accountid
Type: string
- Name: region
Type: string
- Name: partition
Type: string
- Name: id
Type: string
- Name: arn
Type: string
- Name: type
Type: string
- Name: resource
Type: struct<resourcetype:string,accesskeydetails:struct<accesskeyid:string,principalid:string,usertype:string,username:string>>
- Name: service
Type: struct<servicename:string,detectorid:string,action:struct<actiontype:string,awsapicallaction:struct<api:string,servicename:string,callertype:string,remoteipdetails:struct<ipaddressv4:string,organization:struct<asn:string,asnorg:string,isp:string,org:string>,country:struct<countryname:string>,city:struct<cityname:string>,geolocation:struct<lat:float,lon:float>>,affectedresources:string>>,resourcerole:string,additionalinfo:struct<recentapicalls:array<struct<api:string,count:int>>>,evidence:string,eventfirstseen:string,eventlastseen:string,archived:boolean,count:int>
- Name: severity
Type: int
- Name: createdat
Type: string
- Name: updatedat
Type: string
- Name: title
Type: string
- Name: description
Type: string
# reference: https://docs.aws.amazon.com/ja_jp/athena/latest/ug/vpc-flow-logs-create-table-statement.html
VPCFlowLogsTable01:
Type: AWS::Glue::Table
Properties:
DatabaseName: !Ref GlueDatabase01
CatalogId: !Ref AWS::AccountId
TableInput:
TableType: EXTERNAL_TABLE
Name: vpc_flow_logs
PartitionKeys:
- Name: account
Type: string
- Name: region
Type: string
- Name: timestamp
Type: string
Parameters:
projection.enabled: true
projection.account.type: integer
projection.account.digit: 12
projection.account.range: 000000000000,999999999999
projection.timestamp.type: date
projection.timestamp.range: NOW-7YEARS,NOW
projection.timestamp.format: yyyy/MM/dd
projection.timestamp.interval: 1
projection.timestamp.interval.unit: DAYS
projection.region.type: enum
projection.region.values: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2,eu-south-1,eu-west-3,eu-north-1,me-south-1,sa-east-1
EXTERNAL: TRUE
storage.location.template: !Sub s3://sample-audit-bucket-${AWS::AccountId}-${AWS::Region}/AWSLogs/${!account}/vpcflowlogs/${!region}/${!timestamp}
StorageDescriptor:
Location: !Sub s3://sample-audit-bucket-${AWS::AccountId}-${AWS::Region}/AWSLogs/
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
SerdeInfo:
SerializationLibrary: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Parameters:
field.delim: ' '
skip.header.line.count: 1
Parameters:
classification: textfile
skip.header.line.count: 1
Columns:
- Name: version
Type: int
- Name: account_id
Type: string
- Name: interface_id
Type: string
- Name: srcaddr
Type: string
- Name: dstaddr
Type: string
- Name: srcport
Type: int
- Name: dstport
Type: int
- Name: protocol
Type: bigint
- Name: packets
Type: bigint
- Name: bytes
Type: bigint
- Name: start
Type: bigint
- Name: end
Type: bigint
- Name: action
Type: string
- Name: vpc_id
Type: string
- Name: subnet_id
Type: string
- Name: instance_id
Type: string
- Name: tcp_flags
Type: int
- Name: type
Type: string
- Name: pkt_srcaddr
Type: string
- Name: pkt_dstaddr
Type: string
- Name: az_id
Type: string
- Name: sublocation_type
Type: string
- Name: sublocation_id
Type: string
- Name: pkt_src_aws_service
Type: string
- Name: pkt_dst_aws_service
Type: string
- Name: flow_direction
Type: string
- Name: traffic_path
Type: int
# reference: https://repost.aws/knowledge-center/analyze-logs-athena
ServerAccessLogTable01:
Type: AWS::Glue::Table
Properties:
DatabaseName: !Ref GlueDatabase01
CatalogId: !Ref AWS::AccountId
TableInput:
Name: server_access_log
TableType: EXTERNAL_TABLE
PartitionKeys:
- Name: account
Type: string
- Name: region
Type: string
- Name: timestamp
Type: string
- Name: source_bucket_name
Type: string
Parameters:
projection.enabled: true
projection.account.type: integer
projection.account.digit: 12
projection.account.range: 000000000000,999999999999
projection.timestamp.format: yyyy/MM/dd
projection.timestamp.interval: 1
projection.timestamp.interval.unit: DAYS
projection.timestamp.range: NOW-7YEARS,NOW
projection.timestamp.type: date
projection.region.type: enum
projection.region.values: us-east-1,us-east-2,us-west-1,us-west-2,af-south-1,ap-east-1,ap-south-1,ap-northeast-2,ap-southeast-1,ap-southeast-2,ap-northeast-1,ca-central-1,eu-central-1,eu-west-1,eu-west-2,eu-south-1,eu-west-3,eu-north-1,me-south-1,sa-east-1
projection.source_bucket_name.type: injected
storage.location.template: !Sub s3://sample-server-access-log-bucket-${AWS::AccountId}-${AWS::Region}/${!account}/${!region}/${!source_bucket_name}/${!timestamp}
StorageDescriptor:
Location: !Sub s3://sample-server-access-log-bucket-${AWS::AccountId}-${AWS::Region}/
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
SerdeInfo:
SerializationLibrary: org.apache.hadoop.hive.serde2.RegexSerDe
Parameters:
input.regex: '([^ ]*) ([^ ]*) \[(.*?)\] ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (\"[^\"]*\"|-) (-|[0-9]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (\"[^\"]*\"|-) ([^ ]*)(?: ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*))?.*$'
Columns:
- Name: bucketowner
Type: string
- Name: bucket_name
Type: string
- Name: requestdatetime
Type: string
- Name: remoteip
Type: string
- Name: requester
Type: string
- Name: requestid
Type: string
- Name: operation
Type: string
- Name: key
Type: string
- Name: request_uri
Type: string
- Name: httpstatus
Type: string
- Name: errorcode
Type: string
- Name: bytessent
Type: bigint
- Name: objectsize
Type: bigint
- Name: totaltime
Type: string
- Name: turnaroundtime
Type: string
- Name: referrer
Type: string
- Name: useragent
Type: string
- Name: versionid
Type: string
- Name: hostid
Type: string
- Name: sigv
Type: string
- Name: ciphersuite
Type: string
- Name: authtype
Type: string
- Name: endpoint
Type: string
- Name: tlsversion
Type: string
- Name: accesspointarn
Type: string
- Name: aclrequired
Type: string