error handling
import traceback
try:
something
except:
traceback.print_exc()
Assignment Expresions
allow return value in assignment
any((ret := n) % 7 == 0 for n in nums)
lambda function
add = lambda a,b,c : a + b + c
print( add(5,4,6) )
map function
map(function you want to apply, sequence of elements we want to apply it to)
def interest(amount):
rate = 5
year = 3
return amount * rate * year / 100
amount = [10000, 12000, 15000]
interest_list = list( map(interest,amount) )
print( interest_list )
filter function
filter(function that checks for a condition, sequence of elements we want to apply it to)
def eligibility(age):
if(age>=24):
return True
list_of_age = [10, 24, 27, 33, 30, 18, 17, 21, 26, 25]
age = filter(eligibility, list_of_age)
print(list(age))
#combine filter with lambda
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
even = list(filter(lambda x: x%2==0, numbers))
print(even)
pipe
filter with where
from pipe import where
arr = [1,2,3,4,5]
list(arr | where(lambda x. x%2==0))
apply func to iterable
from pipe import select
arr = [1,2,3,4]
list(arr | select(lambda x: x*2))
#or combine where and select
list(arr
| where(lambda x: x%2==0)
| select(lambda x: x*2))
unfold iterable
from pipe import traverse, select
fruits = [
{"name":"apple", "price":[2,5]},
{"name":"orange", "price":4},
{"name":"grape", "price":5}
]
list(fruits
| select(lambda fruit:fruit["price"])
| traverse)
reduce function
reduce(function, iterable[, initializer])
from functools import reduce
def add(a,b):
return a+b
list = [1, 2, 3, 4, 5]
sum = reduce(add, list)
print(list(sum))
destructuring
#a=[1,2], b=[3,4]
[a, b] = [[1,2],[3,4]]
# a=('x',1), b=('y', 2)
myDict={'x':1, 'y':2}
a,b=myDict.items()
#enumerate
myList = ["a", "b", "c"]
for i, element in enumerate(myList):
print(i, element)
#asterisk
def sum(a, b, c):
return a + b + c
x = (1, 2, 3)
print(result(*x))
#*_ omit an unknown number of values
#first is 'H', last is 'o'
first, *_, last = "Hello"
Get Method for Dictionaries
with get method instead of direct indexing it is possible to get an replaced value rather than error if key does not existss
dictionary.get('three', False)
Tree Datatypes
create nested dict
class Tree(dict):
def__missing__(self, key):
value = self[key] = type(self)()
return value
#example
tree = Tree()
tree['carnivora']['canis']['c.lupus'] = 'c.l.familiaris'
tree['carnivora']['felis'] = 'f.catus'
print(tree)
list indexing
list indexing can be used in following way:
list[start🔚step]
a named slices is more advanced for this kind of indexing
x = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
new_slice = slice(3, 8, 2)
x[new_slice]
f-string
#old method
print("Happy %s, %s. Welcome to Python!" % (day, name))
#str.format() method
print("Happy {}, {}. Welcome to Python!".format(day, name))
#f-string, recommended to use
name = "Monty"
day = "Tuesday"
print(f"Happy {day}, {name}. Welcome to Python!")
list comprehension
new_list = [expression for item in iterable (if conditional)]
create N-length lists
#[None, None, None, None]
four_nones = [None] * 4
#[[], [], [], []]
four_lists = [[] for __ in range(4)]
delete element of lists
In short, don’t use for loops when you’re deleting items from a list, rather use list comprehension
foos = [value for value in a if value != 'bar']
Unpack arguments with *, **, and _
- A variable beginning with * can hold as any number of elements
#1st example long_list = [x for x in range(100)] a, b, *c, d, e, f = long_list #2nd example def printfunction(*args): print(args)
- ** operator can unpack dictionaries to a function
def myfriendsfunction(name, age, profession): ... print("Name: ", name) ... print("Age: ", age) ... print("Profession: ", profession) friendanne = {"name": "Anne", "age": 26, "profession": "Senior Developer"} myfriendsfunction(**friendanne)
dir() return all attributes and methods of an object
sys.getsizeof() get memory usage of a variable
split lines with () rather thatn \
big_string = (
"This is the beginning of a really long story. "
"It's full of magicians, dragons and fabulous creatures. "
"Needless to say, it's quite scary, too."
)
use type hints
def sayhello(day:str, name:str) -> str:
return f"Happy {day}, {name}. Welcome to Python!"
Ternary Expression
define variables with particular values based on the conditions
reward = "1000 dollars" if score > 90 else "500 dollars"
Evaluate Multiple Conditions
# Do these instead
if all([a < 10, b > 5, c == 4]):
# do something
if any([a < 10, b > 5, c == 4]):
# do something
Use Counter for unique Element Counting
from collections import Counter
word_counter = Counter(x.lower() for x in words)
#find the most frequently occurring element
print("Most Frequent:", word_counter.most_common(1))
sorting
#sort list
sorted(numbers, reverse=True)
sorted(words, reverse=True)
#sort list of tuples
grades = [('John', 95), ('Aaron', 99), ('Zack', 97), ('Don', 92), ('Jennifer', 100), ('Abby', 94), ('Zoe', 99), ('Dee', 93)]
# Sort by the grades, descending
sorted(grades, key=lambda x: x[1], reverse=True)
# Sort by the name's initial letter, ascending
sorted(grades, key=lambda x: x[0][0])
# sort with two keys
sorted(grades, key=lambda x: (x[0][0], -x[1]))
defaultdict
defaultdict can avoid key not exist error when putting list or tuple in dict
from collections import defaultdict
final_defaultdict = defaultdict(list)
for letter in letters:
final_defaultdict[letter].append(letter)
any/all
# finds an element that evalutes to True it can return True immediately
found = any(thing == other_thing for thing in things)
# return True only when all the elements evaluate to True
found = all(thing == other_thing for thing in things)
replace list() with []/ replace dict() with {}
time performance is better
Replace if statement with if expression
x = 1 if condition else 2
Use items() to directly unpack dictionary values
hats_by_colour = {'blue': ['panama', 'baseball_cap']}
for hat_colour, hats in hats_by_colour.items():
if hat_colour in self.favourite_colours:
think_about_wearing(hats)
Simplify sequence comparison
check whether a list or sequence has elements before we try and do something with it
if len(list_of_hats) > 0:
#can be shortened as
if list_of_hats:
Replace multiple comparisons of same variable with in operator
def process_payment(payment):
if payment.currency == 'USD' or payment.currency == 'EUR':
process_standard_payment(payment)
#can be shortened into
def process_payment(payment):
if payment.currency in ['USD', 'EUR']:
process_standard_payment(payment)
data class
this feature is for python 3.7+
from dataclasses import dataclass
@dataclass
class DataClassCard:
rank: str
suit: str
queen_of_hearts = DataClassCard('Q', 'Hearts')
queen_of_hearts.rank
parallel computing
processpoolexecutor
using concurrent.futures to process tasks with more processors
with concurrent.futures.ProcessPoolExecutor() as executor:
executor.map(func, args_for_func)
Pool
from multiprocessing.dummy import Pool as ThreadPool
# Make the Pool of workers
pool = ThreadPool(4)
# Open the urls in their own threads
# and return the results
results = pool.map(urllib2.urlopen, urls)
#close the pool and wait for the work to finish
pool.close()
pool.join()
concurrent.futures.ProcessPoolExecutor()
ppserver
celery
Interning
Interning is re-using the objects on-demand instead of creating the new objects. is — this is used to compare the memory location of two python objects. id — this returns memory location in base-10.
joblib
caching results
from joblib import Memory
#Define a location to store cache
location = '~/Desktop/temp/cache_dir'
memory = Memory(location, verbose=0)
result = []
#Function to compute square of a range of a number:
def get_square_range_cached(start_no, end_no):
for i in np.arange(start_no, end_no):
time.sleep(1)
result.append(square_number(i))
return result
get_square_range_cached = memory.cache(get_square_range_cached)
start = time.time()
final_result = get_square_range_cached(1, 21)
end = time.time()
#Clean-up the cache folder
memory.clear(warn=False)
parallelization
from joblib import Parallel, delayed
from joblib import Memory
location = 'C:/Users/pg021/Desktop/temp/cache_dir'
memory = Memory(location, verbose=0)
costly_compute_cached = memory.cache(costly_compute)
def data_processing_mean_using_cache(data, column):
"""Compute the mean of a column."""
return costly_compute_cached(data, column).mean()
start = time.time()
results = Parallel(n_jobs=2)(
delayed(data_processing_mean_using_cache)(data, col)
for col in range(data.shape[1]))
stop = time.time()
print('Elapsed time for the entire processing: {:.2f} s'
.format(stop - start))
dump results
from joblib import dump, load
start = time.time()
joblib_file = 'train_features.joblib'
with open(path + joblib_file, 'wb') as f:
dump(data, f)
# Calculating the total time
simple_joblib_duration = time.time() - start
print("Dump duration: %0.3fs" % simple_joblib_duration)
start = time.time()
with open(path + joblib_file, 'wb') as f:
load(data, f)
# Calculating the total time
simple_joblib_duration = time.time() - start
print("Dump duration: %0.3fs" % simple_joblib_duration)
dump with compression
start = time.time()
joblib_file = '/train_features.joblib'
# Dumping the file in the zlib compression format
with open(path + joblib_file, 'wb') as f:
dump(data, f, compress='zlib')
simple_joblib_duration = time.time() - start
# Total time taken to dump
print("Zlib dump duration: %0.3fs" % simple_joblib_duration)
plydata
#example data
import pandas as pd
from plydata import define, query, if_else, ply
df = pd.DataFrame({
'x': [0, 1, 2, 3],
'y': ['zero', 'one', 'two', 'three']})
define
define(data, *args,**kwargs)
#df won't be changed(no insert)
define(df, z='x')
#it equals to
df >> define(z='x')
#for multi operations
(df
>> define(m='2*x')
>> define(n='m*m')
>> define(q='m+n')
)
if_else
if_else(predicate, true_value, false_value)
define(df, z=if_else('x>1', 1, 0))
df >> define(z=if_else('x>1', 1, 0))
query
query(data, expr)
df >> query('z==1')
ply
ply is pipe operator, equal to »
(df
>> define(z=if_else('x>1', 1, 0))
>> query('z==1')
)
#is equal to
ply(df,
define(z=if_else('x > 1', 1, 0)),
query('z == 1')
)
async & Await
feature for python >=3.5
async def ping_server(ip):
# ping code here...
async def ping_local():
return await ping_server('192.168.1.1')
divmod
performs a modulus division % on two numbers, then returns both the quotient and remainder
casefold
casefold provides standardize, more aggressively, a wider range of characters to lower case. in most cases it is same as lower, but in some cases not
"ς".casefold() # returns "σ"
"ς".lower() # returns "ς"
For/Else Statements
else will run if no break occurs
for x in range(3):
if x ==4 :
break
else:
print('We never broke out')
Queue Module
there are three types of Queue: FIFO, LIFO, and Priority Queue.
1) FIFO
import queue
# queue.Queue()
q = queue.Queue() for i in range(5): q.put(i) while not q.empty(): print(q.get())
# result: 0,1,2,3,4
# queue.SimpleQueue()
simple_q = queue.SimpleQueue() for i in range(5): simple_q.put(i) while not simple_q.empty(): print(simple_q.get())
# result: 0,1,2,3,4
q = queue.Queue(maxsize=3) try: for i in range(5): q.put(i, block=False) except queue.Full: print(“Queue is Full with 3 items.”) try: for i in range(5): print(f”element {q.get(block=False)}”) except queue.Empty: print(“Queue is already empty”) 2) LIFO LIFO is also named as stack.
import queue
q = queue.LifoQueue() for i in range(5): q.put(i)
while not q.empty(): print(q.get())
# result: 4,3,2,1,0
q = queue.LifoQueue(maxsize=3) try: for i in range(5): q.put(i, block=False) except queue.Full: print(“Queue is Full with 3 items”) try: for i in range(5): print(f”element {q.get(block=False)}”) except queue.Empty: print(“Queue is already empty”) 3) Priority Queue priority queue uses min heap queue algorithm. priority queue not only work with numbers but also complex data types like tuple or customized classes as long as the objects are comparable. dataclass with config order=True can make complex data struct comparable
import queue q = queue.PriorityQueue()
for i in [4,1,3,2,0]: q.put(i) while not q.empty(): print(q.get())
#complex struct from dataclasses import dataclass from typing import Any
@dataclass(order=True) class Item: key: int value: Any
q = queue.PriorityQueue()
for i in [Item(3,”leiden”),Item(1,”amsterdam”),Item(2,”rotterdam”),Item(1,”utrecht”)]: q.put(i) while not q.empty(): print(q.get())
user case
Queue is designed for multi-threading with following characteristics:
- thread-safe characteristic
- avoid potential memory explosion
-
reduce busy waiting
import queue import threading import random import requests
def download(queue): id = queue.get() result = requests.get(f”https://jsonplaceholder.typicode.com/photos/{id}”) url = result.json()[“thumbnailUrl”] save_image(id, url) print(f”Save image {id}”) queue.task_done() # this is new
NUM_THREADS = 10 q = queue.Queue()
for i in range(NUM_THREADS): worker = threading.Thread(target=download,args=(q,)) worker.start()
for i in range(NUM_THREADS): id = random.randint(1,100) q.put(id)
q.join()
numpy
broadcasting
if numpy operate on two array of different size, the smaller array becomes broadcast across teh larger array if the dimension of smaller array is 1. otherwise exception is thrown
slicing
array slicing is one shallow copy of original array
originalArray= np.arange(0,10)
#subArray is only a reference to original array
subArray=originalArray[5:]
linspace
#endpoint is boolean whether to include the stop num or not
np.linspace(start, stop, num, endpoint)
size and type
array= np.arange(0,10)
array.size
array.dtype
array.shape
array.ndim
cache value
from functools import lru_cache
@lru_cache(maxsize=1000)
def fibonacci(input_value):
if input_value ==1:
return 1
elif input_value == 2:
return 1
elif input_value > 2:
return fibonacci(input_value-1)+fibonacci(input_value-2)
load local env
create .env file in local path and save local env viriables in this file, e.g. GG_API_KEY=INSERT API TOKEN. To load these env variable in python environments:
from dotenv import load_dotenv
load_dotenv()
new features in python 3.8.5
Assignment operator ( := )
A new syntax := that assigns values to variables as part of a larger expression
if (n := len(a)) > 10:
print(f"List is too long ({n} elements, expected <= 10)")
Positional-only parameters ( / )
#a and b are positional-only parameters,c and d can be positional or keyword, and e and f are required to be keywords
def (a,b,/,c,d,*,e,f)
print(a,b,c,d,e,f)