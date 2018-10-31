Discover, triage, and prioritize Python errors in real-time
Visit Sentry https://sentry.io/promoted
Software Development Engineer at AWS Elemental
$ python -m pip install requests
$ python -m pip install requests --user
def fetch(session, csv):
base_url = "https://people.sc.fsu.edu/~jburkardt/data/csv/"
with session.get(base_url + csv) as response:
data = response.text
if response.status_code != 200:
print("FAILURE::{0}".format(url))
# Return .csv data for future consumption
return data
object and the name of the .csv file desired, performs the web request, then returns the text inside the response.
Session
from timeit import default_timer()
def get_data_synchronous():
csvs_to_fetch = [
"ford_escort.csv",
"cities.csv",
"hw_25000.csv",
"mlb_teams_2012.csv",
"nile.csv",
"homes.csv",
"hooke.csv",
"lead_shot.csv",
"news_decline.csv",
"snakes_count_10000.csv",
"trees.csv",
"zillow.csv"
]
with requests.Session() as session:
print("{0:<30} {1:>20}".format("File", "Completed at"))
# Set any session parameters here before calling `fetch`
# For instance, if you needed to set Headers or Authentication
# this can be done before starting the loop
total_start_time = default_timer()
for csv in csvs_to_fetch:
fetch(session, csv)
elapsed = default_timer() - total_start_time
time_completed_at = "{:5.2f}s".format(elapsed)
print("{0:<30} {1:>20}".format(csv, time_completed_at))
object and then loops through each .csv file in the
Session
list. Once the
csvs_to_fetch
operation is completed, the measured time is calculated and displayed in an easy-to-read format.
fetch
function will be simple (for now) and call our function:
main
def main():
# Simple for now
get_data_synchronous()
main()
import requests
from timeit import default_timer
def fetch(session, csv):
base_url = "https://people.sc.fsu.edu/~jburkardt/data/csv/"
with session.get(base_url + csv) as response:
data = response.text
if response.status_code != 200:
print("FAILURE::{0}".format(url))
# Return .csv data for future consumption
return data
def get_data_synchronous():
csvs_to_fetch = [
"ford_escort.csv",
"cities.csv",
"hw_25000.csv",
"mlb_teams_2012.csv",
"nile.csv",
"homes.csv",
"hooke.csv",
"lead_shot.csv",
"news_decline.csv",
"snakes_count_10000.csv",
"trees.csv",
"zillow.csv"
]
with requests.Session() as session:
print("{0:<30} {1:>20}".format("File", "Completed at"))
# Set any session parameters here before calling `fetch`
# For instance, if you needed to set Headers or Authentication
# this can be done before starting the loop
total_start_time = default_timer()
for csv in csvs_to_fetch:
fetch(session, csv)
elapsed = default_timer() - total_start_time
time_completed_at = "{:5.2f}s".format(elapsed)
print("{0:<30} {1:>20}".format(csv, time_completed_at))
def main():
# Simple for now
get_data_synchronous()
main()
library!
asyncio
:
fetch
import requests
from timeit import default_timer
# We'll need access to this variable later
START_TIME = default_timer()
def fetch(session, csv):
base_url = "https://people.sc.fsu.edu/~jburkardt/data/csv/"
with session.get(base_url + csv) as response:
data = response.text
if response.status_code != 200:
print("FAILURE::{0}".format(url))
# Now we will print how long it took to complete the operation from the
# `fetch` function itself
elapsed = default_timer() - START_TIME
time_completed_at = "{:5.2f}s".format(elapsed)
print("{0:<30} {1:>20}".format(csv, time_completed_at))
return data
function asynchronous:
get_data
import asyncio
from timeit import default_timer
from concurrent.futures import ThreadPoolExecutor
async def get_data_asynchronous():
csvs_to_fetch = [
"ford_escort.csv",
"cities.csv",
"hw_25000.csv",
"mlb_teams_2012.csv",
"nile.csv",
"homes.csv",
"hooke.csv",
"lead_shot.csv",
"news_decline.csv",
"snakes_count_10000.csv",
"trees.csv",
"zillow.csv"
]
print("{0:<30} {1:>20}".format("File", "Completed at"))
# Note: max_workers is set to 10 simply for this example,
# you'll have to tweak with this number for your own projects
# as you see fit
with ThreadPoolExecutor(max_workers=10) as executor:
with requests.Session() as session:
# Set any session parameters here before calling `fetch`
# Initialize the event loop
loop = asyncio.get_event_loop()
# Set the START_TIME for the `fetch` function
START_TIME = default_timer()
# Use list comprehension to create a list of
# tasks to complete. The executor will run the `fetch`
# function for each csv in the csvs_to_fetch list
tasks = [
loop.run_in_executor(
executor,
fetch,
*(session, csv) # Allows us to pass in multiple arguments to `fetch`
)
for csv in csvs_to_fetch
]
# Initializes the tasks to run and awaits their results
for response in await asyncio.gather(*tasks):
pass
function for each that needs to be downloaded.
fetch
function needs a small tweak to properly initialize our async function:
main
def main():
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(get_data_asynchronous())
loop.run_until_complete(future)
main()
import requests
import asyncio
from concurrent.futures import ThreadPoolExecutor
from timeit import default_timer
START_TIME = default_timer()
def fetch(session, csv):
base_url = "https://people.sc.fsu.edu/~jburkardt/data/csv/"
with session.get(base_url + csv) as response:
data = response.text
if response.status_code != 200:
print("FAILURE::{0}".format(url))
elapsed = default_timer() - START_TIME
time_completed_at = "{:5.2f}s".format(elapsed)
print("{0:<30} {1:>20}".format(csv, time_completed_at))
return data
async def get_data_asynchronous():
csvs_to_fetch = [
"ford_escort.csv",
"cities.csv",
"hw_25000.csv",
"mlb_teams_2012.csv",
"nile.csv",
"homes.csv",
"hooke.csv",
"lead_shot.csv",
"news_decline.csv",
"snakes_count_10000.csv",
"trees.csv",
"zillow.csv"
]
print("{0:<30} {1:>20}".format("File", "Completed at"))
with ThreadPoolExecutor(max_workers=10) as executor:
with requests.Session() as session:
# Set any session parameters here before calling `fetch`
loop = asyncio.get_event_loop()
START_TIME = default_timer()
tasks = [
loop.run_in_executor(
executor,
fetch,
*(session, csv) # Allows us to pass in multiple arguments to `fetch`
)
for csv in csvs_to_fetch
]
for response in await asyncio.gather(*tasks):
pass
def main():
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(get_data_asynchronous())
loop.run_until_complete(future)
main()