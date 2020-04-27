Hackernoon supports freeCodeCamp.org
My name is Uria Franko and I’m a free-lancer developer
npm install -g serverless
serverless create --template aws-python3 --path my-assets-monitor
serverless config credentials --provider aws --key xxxxxxxxxxxxxx --secret xxxxxxxxxxxxxx
import json
import os
import sys
from concurrent.futures import ThreadPoolExecutor
from urllib.parse import urlparse
here = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(here, "./vendored"))
import requests
from bs4 import BeautifulSoup
from mailer import Mailer
mailer = Mailer(os.environ['TARGET_URL'], os.environ['SOURCE_EMAIL'], os.environ['DESTINATION_URL'],)
internal_urls = set()
external_urls = set()
def multi_threading(func, args, workers):
with ThreadPoolExecutor(workers) as ex:
res = ex.map(func, args)
return list(res)
def is_valid(url):
parsed = urlparse(url)
return bool(parsed.netloc) and bool(parsed.scheme)
def check_status(url):
global mailer
resp = requests.get(url)
if resp.status_code > 399:
mailer.assets.append(url)
def request_url(url):
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")
except requests.exceptions.ConnectTimeout as err:
errors = ['Connection timed out to your target']
mailer.send_errors(errors)
return False
except requests.exceptions.ConnectionError as err:
errors = [err]
mailer.send_errors(errors)
return False
except requests.exceptions.HTTPError as err:
errors = [f'Your target raised <strong>{response.status_code}</strong> status code']
mailer.send_errors(errors)
return False
return soup
def get_all_website_links(url):
urls = set()
url_parsed = urlparse(url)
domain_name = url_parsed.netloc
soup = request_url(url)
if soup is False:
return False
for a_tag in soup.findAll(["a", "link", "img", "script"]):
source = 'src'
if a_tag.name == "a" or a_tag.name == "link":
source = 'href'
href = a_tag.attrs.get(source)
if href == "" or href is None or '#' in href:
continue
parsed_href = urlparse(href)
if parsed_href.netloc == "":
if href[0] == "/":
href = url_parsed.scheme + "://" + domain_name + href
else:
href = url_parsed.scheme + "://" + domain_name + "/" + href
if not is_valid(href):
continue
if href in internal_urls:
continue
if domain_name not in href:
if href not in external_urls:
external_urls.add(href)
continue
urls.add(href)
internal_urls.add(href)
return urls
def main(event, context):
crawled_links = get_all_website_links(os.environ['TARGET_URL'])
if crawled_links is False:
response = {
"statusCode": 500,
"body": "Error raised trying to get the target"
}
return response
multi_threading(check_status, crawled_links, 20)
mailer.send_mail()
response = {
"statusCode": 200,
}
return response
import boto3
import os
class Mailer:
def __init__(self, base_url, source_email, target_email = None):
aws_access_key_id = os.environ['AWS_KEY']
aws_secret_access_key = os.environ['AWS_SECRET']
self.client = boto3.client('ses', aws_access_key_id = aws_access_key_id,
aws_secret_access_key = aws_secret_access_key,
region_name = 'us-east-1')
self.target_email = source_email
if target_email is not None:
self.target_email = target_email
self.base_url = base_url
self.source_email = source_email
self.assets = []
def send_mail(self):
subject = "Assets Monitor Asset Failure"
body = f"""
<h2>There's a problem with one of your assets!</h2>
<h4>Base URL: <a href={self.base_url}>{self.base_url}</a></h4>
"""
for link in self.assets:
body += f'<a href="{link}"><p>{link}</p></a><br>'
self.send(subject, body)
def send_errors(self, errors):
subject = "Assets Monitor Website Failure"
body = f"""
<h3>There's a problem with your monitored website:</h3>
<h4>{self.base_url}</h4>
"""
for err in errors:
body += f"<p>{err}</p><br>"
self.send(subject, body)
def send(self, subject, body):
self.client.send_email(
Source = self.source_email,
Destination = {
'ToAddresses': [
self.target_email,
],
},
Message = {
'Subject': {
'Data': subject,
'Charset': 'UTF-8'
},
'Body': {
'Html': {
'Data': body,
'Charset': 'UTF-8'
}
}
},
ReplyToAddresses = [
self.source_email,
],
)
requests
bs4
pip install -r requirements.txt -t vendored
service: my-asset-monitor
provider:
name: aws
runtime: python3.7
stage: dev
region: us-east-1
environment:
AWS_KEY: ""
AWS_SECRET: ""
TARGET_URL: ""
SOURCE_EMAIL: ""
DESTINATION_EMAIL: ""
functions:
post:
handler: handler.main
events:
- schedule:
name: asset-checker-schedule
description: 'Schedule asset checking every 30 minutes'
rate: rate(30 minutes)
serverless deploy