🈁 Strings and bytes
👇 Strings
>>> '"Where is our homework?"'
'"Where is our homework?"'
>>> "this" "weekend"
'this weekend'
>>> """on
github"""
'on\ngithub'
.encode("utf-8")
🔄 .decode("utf-8")
>>> b"\00\42\24\00"
b'\x00"\x14\x00'
>>> rb"\00\42\24\00"
b'\\00\\42\\24\\00'
☝️ Bytes
💬 Escape sequences, raw strings
>>> print("\tell me")
ell me
>>> print(r"\tell me")
\tell me
str
� unicode
>>> s = "мем"
>>> ascii(s)
"'\\u043c\\u0435\\u043c'"
>>> repr(s)
"'мем'"
>>> list(s)
['м', 'е', 'м']
>>> s[0], type(s[0])
('м', <class 'str'>)
>>> list(s[0])
['м']
😮 Unicode 13.0 – 143 859 characters
…what are those for?
>>> ord("💅")
128133
>>> chr(128133)
'💅'
>>> chr(ord("💅"))
'💅'
🤔 How much bits (bytes) are used to represent symbols?
hint: encoding
PEP-393
Universal Character Set
N bytes for each symbol
- UCS-1 ⬅️ ASCII
- UCS-2 ⬅️ max code < 2^16
- UCS-4 ⬅️ everything else
>>> list(map(ord, "hello"))
[104, 101, 108, 108, 111]
>>> list(map(ord, "привет"))
[1087, 1088, 1080, 1074, 1077, 1090]
>>> list(map(ord, "🧐🐍➡️🤬💀"))
[129488, 128013, 10145, 65039, 129324, 128128]
💨 Quick basic methods overview
1️⃣ Registry modificators
>>> "foo bar".capitalize()
'Foo bar'
>>> "foo bar".title()
'Foo Bar'
>>> "foo bar".upper()
'FOO BAR'
>>> "foo bar".lower()
'foo bar'
>>> "foo bar".title().swapcase()
'fOO bAR'
2️⃣ Nice output
>>> "foo bar".ljust(16, '~')
'foo bar~~~~~~~~~'
>>> "foo bar".rjust(16, '~')
'~~~~~~~~~foo bar'
>>> "foo bar".center(16, '~')
'~~~~foo bar~~~~~'
3️⃣ Removing symbols
>>> "]>>foo bar<<[".lstrip("]>")
'foo bar<<['
>>> "]>>foo bar<<[".rstrip("[<")
']>>foo bar'
>>> "]>>foo bar<<[".strip("[]<>")
'foo bar'
>>> "\t foo bar \r\n ".strip()
'foo bar'
4️⃣ Splitting strings
>>> "foo,,,bar".split(",")
['foo', '', '', 'bar']
>>> "foo,bar,baz".partition(",")
('foo', ',', 'bar,baz')
5️⃣ Joining strings
>>> ", ".join(["foo", "bar", "baz"])
'foo, bar, baz'
>>> ", ".join(filter(None, ["", "foo"]))
'foo'
>>> ", ".join("bar")
# ???
6️⃣ Finding things
>>> "foo" in "foobar"
True
>>> "kek" not in "foobar"
True
>>> "foobar".startswith("foo")
True
>>> "foobar".endswith(("boo", "bar"))
True
>>> "foobar".find("ob")
2
>>> "ob".index("kek")
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: substring not found
>>> "boo" in b"foobar"
# ???
>>> b"foobar".replace("o", "")
# ???
7️⃣ Replacing things
>>> "clubclubhouse".replace("club", "**", 1)
'**clubhouse'
>>> translation_map = {
ord("C"): "B",
ord("L"): "E",
ord("U"): "E",
ord("B"): "R"
}
>>> "CLUBhouse".translate(translation_map)
'BEERhouse'
8️⃣ Useful predicates
>>> "100500".isdigit()
True
>>> "foo100500".isalnum()
True
>>> "foobar".isalpha()
True
>>> "foobar".islower()
True
>>> "FOOBAR".isupper()
True
>>> "Foo Bar".istitle()
True
>>> "\r \n\t \r\n".isspace()
True
9️⃣ Formatting
>>> cost = 300
>>> f"You owe me {cost} bucks"
'You owe me 300 bucks'
>>> f"{3000000:,}"
'3,000,000'
>>> f"{3000000:~^11}"
'~~3000000~~'
>>> "{0}, {1}, {0}".format("hello", "kitty")
'hello, kitty, hello'
>>> "{0}, {who}, {0}".format("hello", who="kitty")
'hello, kitty, hello'
FYI: you don’t need to remember formatting symbols (use docs!)
Formatting: please, don’t
>>> "%s, %s, how are you?" % ("Hello", "Sally")
'Hello, Sally, how are you?'
- % is binary, requires list/tuple
- every element is used only once
- problems with long numbers
import string
>>> string.ascii_letters
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
>>> string.digits
'0123456789'
>>> string.punctuation
'!"#%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
>>> string.whitespace
' \t\n\r\x0b\x0c'
Strings and bytes wrap-up
- strings are sequences of codepoints (Unicode sequences), symbol is a string too
- cool methods for bytes and strings
- Universal Character Set: N bytes for symbol (1, 2, 4)
- don’t use
%
, usef""
and"".forma
- do not confuse bytes and strings
📂 Files and IO
🚀 Files opening
>>> open("./memes.txt") # text
<[...] name='./memes.txt' mode='r' encoding='UTF-8'>
modes: “r”, “w”, “x”, “a”, “+”, “b”, “t”.
>>> open("./binary.db", "r+b") # ???
<_io.BufferedRandom name='./binary.db'>
Reading and writing
>>> handle = open("./memes.txt")
>>> handle.read(13)
'HEREWEGOAGAIN'
>>> handle.readline(13)
'HEREWEGOAGAIN'
>>> handle.close()
>>> handle = open("./example.txt", "w")
>>> handle.write("abracadabra")
11 # written bytes
Other functions
>>> handle = open("./example.txt", "r+")
>>> handle.fileno()
3 # ???
>>> handle.tell()
0
>>> handle.seek(8)
>>> handle.tell()
8
>>> handle.write("something unimportant")
>>> handle.flush()
>>> handle.close()
🎶 Three default text files
>>> input("Name: ") # read from sys.stdin
Name: Floppa
'Floppa'
>>> print("Hello, `sys.stdout`!", file=sys.stdout)
Hello, `sys.stdout`!
>>> print("Hello, `sys.stderr`!", file=sys.stderr)
Hello, `sys.stderr`!
🆕 from pathlib import Path
>>> Path(".") / "hello.txt"
PosixPath('hello.txt')
>>> p = Path("hello.txt")
>>> p.parent
PosixPath('.')
>>> p.resolve()
PosixPath('/Users/tv/hello.txt')
>>> p.write_text("hello")
5
>>> p.read_text()
'hello'
>>> p.unlink()
>>> p.read_text()
...
FileNotFoundError: [Errno 2] No such file or directory: 'hello.txt'
🔥 Files and IO wrap-up
- files can be texts or bytes
- methods of reading writing are typical
- Python has stdin, stdout and stderr – text files
- methods are typical, pathlib is useful
Collections
set
>>> xs, ys, zs = {1, 2}, {2, 3}, {3, 4}
>>> set.union(xs, ys, zs) # xs | ys | zs
{1, 2, 3, 4}
>>> set.intersection(xs, ys, zs) # xs & ys & zs
set()
>>> set.difference(xs, ys, zs) # xs - ys - zs
{1}
>>> xs <= ys # xs ⊆ ys
False
>>> xs < xs # xs ⊂ xs
False
>>> xs | ys >= xs # xs ∪ ys ⊇ xs
True
>>> seen = set()
>>> seen.add(42)
>>> seen
{42}
>>> seen.update([], [1], [2], [3])
>>> seen
{1, 2, 42, 3}
>>> seen.remove(3)
>>> seen
{1, 2, 42}
>>> seen.remove(100500) # ???
>>> {set(), set()} # ???
frozenset
Supports all set operations except adding/removing elements.
>>> {frozenset(), frozenset()}
{frozenset()}
tuple
>>> p = (5,) # ???
>>> date = "October", 5
>>> person = ("George", "Carlin", "May", 12, 1937)
>>> name, birthday = person[:2], person[2:]
>>> name
('George', 'Carlin')
>>> NAME, BIRTHDAY = slice(2), slice(2, None)
(None, 2, None)
>>> person[BIRTHDAY]
('May', 12, 1937)
namedtuple
>>> from collections import namedtuple
>>> Person = namedtuple("Person", ["name", "age"])
>>> p = Person("George", age=77)
>>> p._fields
('name', 'age')
>>> p.name, p.age
('George', 77)
>>> p._asdict()
OrderedDict([('name', 'George'), ('age', 77)])
>>> p._replace(name="Bill")
Person(name='Bill', age=77)
list
>>> [0] * 2
[0, 0]
>>> chunks = [[0]] * 2
>>> chunks
[[0], [0]]
>>> chunks[0][0] = 42
>>> chunks
[[42], [42]] # ???
>>> xs = [3, 2, 1]
>>> xs.sort(key=lambda x: x % 2, reverse=True) # timsort
>>> xs
[3, 1, 2]
>>> xs = [1, 2, 3]
>>> xs.append(42) # ==> [1, 2, 3, 42]
>>> xs.extend({-1, -2}) # ==> [1, 2, 3, 42, -2, -1]
>>> xs = [1, 2, 3]
>>> xs.insert(0, 4) # ==> [4, 1, 2, 3]
>>> xs.insert(-1, 42) # ==> [4, 1, 2, 42, 3]
>>> xs = [1, 2, 3]
>>> xs[:2] = [0] * 2 # ==> [0, 0, 3]
>>> xs = [1, 2, 3]
>>> del xs[:2]
>>> xs.pop(1)
2 # sometimes it's useful
>>> xs = [1, 1, 0]
>>> xs.remove(1)
# ???
tuple and list
- reversed()
- concatenation: x + y (always a new object)
>>> xs, ys = [1, 2], [3]
>>> id(xs), id(ys)
(4416336136, 4416336008)
>>> id(xs + ys)
4415376136
but inplace concatenation in list!
>>> xs += ys # ≈ xs = xs.extend(ys)
>>> id(xs)
4416336136
by the way…
>>> xs = []
>>> def f():
xs += [42]
...
>>> f() # ???
dict
>>> d = dict(foo="bar")
>>> dict(d) # copy
{'foo': 'bar'}
>>> dict(d, boo="baz") # copy with new keys
{'boo': 'baz', 'foo': 'bar'}
>>> dict.fromkeys("abcd", 0)
{'d': 0, 'a': 0, 'b': 0, 'c': 0}
>>> d = dict.fromkeys(["foo", "bar"], 42)
>>> d.keys()
dict_keys(['foo', 'bar'])
>>> d.values()
dict_values([42, 42])
>>> d.items()
dict_items([('foo', 42), ('bar', 42)])
>>> for k in d:
del d[k]
...
# ???
>>> len(d.items())
2
>>> 42 in d.values()
True
>>> d.keys() & {"foo"}
{'foo'}
>>> {v for v in d.values()}
{42}
>>> d = {"foo": "bar"}
>>> d["foo"]
'bar'
>>> d["boo"]
# ???
>>> d = {"foo": "bar"}
>>> d["foo"]
'bar'
>>> d["boo"]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
KeyError: 'boo'
>>> d.get("boo", 42)
42
>>> d = {}
>>> d.update([("foo", "bar")], boo=42)
>>> d
{'boo': 42, 'foo': 'bar'}
>>> del d["boo"]
>>> d.pop("foo")
'bar'
>>> d.clear()
>>> d
{}