分布式session

自定义session

点击查看自定义session

一致性哈希和分布式session

将session保存在redis缓存中,可以获得更高的性能。如果有多台缓存服务器,就需要对服务器作负载均衡,将session分发到每台服务器上。实现负载均衡的算法有很多,最常用的是哈希算法,它的基本逻辑是,对session_id(随机字符串)进行哈希,哈希结果再按服务器数量进行取模运算,得到的余数i就是第i个服务器。

一致性哈希(Consistent Hashing)是分布式负载均衡的首选算法。python中有实现模块hash_ring,不需要安装,直接将其中的单文件hash_ring.py拿来用即可,也就100多行代码。一致性哈希除了可以用在这里,也可以用于作分布式爬虫。
一致性哈希,输入随机字符串,算数固定的值,取余获得对应的索引,拿到对应的内容。

hash_ring.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""
hash_ring
~~~~~~~~~~~~~~
Implements consistent hashing that can be used when
the number of server nodes can increase or decrease (like in memcached).

Consistent hashing is a scheme that provides a hash table functionality
in a way that the adding or removing of one slot
does not significantly change the mapping of keys to slots.

More information about consistent hashing can be read in these articles:

"Web Caching with Consistent Hashing":
http://www8.org/w8-papers/2a-webserver/caching/paper2.html

"Consistent hashing and random trees:
Distributed caching protocols for relieving hot spots on the World Wide Web (1997)":
http://citeseerx.ist.psu.edu/legacymapper?did=38148


Example of usage::

cache_servers = [
'192.168.0.246:6379',
'192.168.0.247:6379',
'192.168.0.249:6379'
]

# 配置权重
weights = {
'192.168.0.246:6379': 2,
'192.168.0.247:6379': 2,
'192.168.0.249:6379': 1
}
ring = HashRing(cache_servers, weights)

print(ring.get_node("suijizifuchuan"))


:copyright: 2008 by Amir Salihefendic.
:license: BSD
"""

import math
import sys
from bisect import bisect

if sys.version_info >= (2, 5):
import hashlib

md5_constructor = hashlib.md5
else:
import md5

md5_constructor = md5.new


class HashRing(object):

def __init__(self, nodes=None, weights=None):
"""`nodes` is a list of objects that have a proper __str__ representation.
`weights` is dictionary that sets weights to the nodes. The default
weight is that all nodes are equal.
"""
self.ring = dict()
self._sorted_keys = []

self.nodes = nodes

if not weights:
weights = {}
self.weights = weights

self._generate_circle()

def _generate_circle(self):
"""Generates the circle.
"""
total_weight = 0
for node in self.nodes:
total_weight += self.weights.get(node, 1)

for node in self.nodes:
weight = 1

if node in self.weights:
weight = self.weights.get(node)

factor = math.floor((40 * len(self.nodes) * weight) / total_weight)

for j in range(0, int(factor)):
b_key = self._hash_digest('%s-%s' % (node, j))

for i in range(0, 3):
key = self._hash_val(b_key, lambda x: x + i * 4)
self.ring[key] = node
self._sorted_keys.append(key)

self._sorted_keys.sort()

def get_node(self, string_key):
"""Given a string key a corresponding node in the hash ring is returned.

If the hash ring is empty, `None` is returned.
"""
pos = self.get_node_pos(string_key)
if pos is None:
return None
return self.ring[self._sorted_keys[pos]]

def get_node_pos(self, string_key):
"""Given a string key a corresponding node in the hash ring is returned
along with it's position in the ring.

If the hash ring is empty, (`None`, `None`) is returned.
"""
if not self.ring:
return None

key = self.gen_key(string_key)

nodes = self._sorted_keys
pos = bisect(nodes, key)

if pos == len(nodes):
return 0
else:
return pos

def iterate_nodes(self, string_key, distinct=True):
"""Given a string key it returns the nodes as a generator that can hold the key.

The generator iterates one time through the ring
starting at the correct position.

if `distinct` is set, then the nodes returned will be unique,
i.e. no virtual copies will be returned.
"""
if not self.ring:
yield None, None

returned_values = set()

def distinct_filter(value):
if str(value) not in returned_values:
returned_values.add(str(value))
return value

pos = self.get_node_pos(string_key)
for key in self._sorted_keys[pos:]:
val = distinct_filter(self.ring[key])
if val:
yield val

for i, key in enumerate(self._sorted_keys):
if i < pos:
val = distinct_filter(self.ring[key])
if val:
yield val

def gen_key(self, key):
"""Given a string key it returns a long value,
this long value represents a place on the hash ring.

md5 is currently used because it mixes well.
"""
b_key = self._hash_digest(key)
return self._hash_val(b_key, lambda x: x)

def _hash_val(self, b_key, entry_fn):
return ((b_key[entry_fn(3)] << 24)
| (b_key[entry_fn(2)] << 16)
| (b_key[entry_fn(1)] << 8)
| b_key[entry_fn(0)])

def _hash_digest(self, key):
m = md5_constructor()
m.update(key.encode('utf-8'))
# return map(ord, m.digest()) # python 2
return list(m.digest()) # pyhton 3

分布式session

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import tornado.ioloop
import tornado.web
from hashlib import sha1
import os
import time
import redis
from .hash_ring import HashRing

# 缓存服务器列表
cache_servers = [
'192.168.0.246:6379',
'192.168.0.247:6379',
'192.168.0.249:6379'
]

# 配置权重
weights = {
'192.168.0.246:6379': 2,
'192.168.0.247:6379': 4,
'192.168.0.249:6379': 1
}

ring = HashRing(cache_servers, weights) # 实例化HashRing对象

# 随机生成session_id
create_session_id = lambda: sha1(bytes('%s%s' % (os.urandom(16), time.time()), encoding='utf-8')).hexdigest()


class Session:
"""自定义session"""

info_container = {
# session_id: {'user': info} --> 通过session保存用户信息,权限等
}

def __init__(self, handler):
"""
初始化时传入RequestHandler对象,通过它进行cookie操作
self.handler.set_cookie()
self.handler.get_cookie()
:param handler:
"""
self.handler = handler

# 从 cookie 中获取作为 session_id 的随机字符串,如果没有或不匹配则生成 session_id
random_str = self.handler.get_cookie('session_id')
if (not random_str) or (random_str not in self.info_container):
random_str = create_session_id()
self.info_container[random_str] = {}
self.random_str = random_str

# 每次请求进来都会执行set_cookie,保证每次重置过期时间为当前时间以后xx秒以后
self.handler.set_cookie('session_id', random_str, max_age=60)

def __getitem__(self, item):
# get_node()根据随机字符串哈希取模的结果,来选取服务器;再通过split方式提取服务器hotst和port
host, port = ring.get_node(self.random_str).split(':')
conn = redis.Redis(host=host, port=port)
return conn.hget(self.random_str, item)

def __setitem__(self, key, value):
host, port = ring.get_node(self.random_str).split(':')
conn = redis.Redis(host=host, port=port)
conn.hset(self.random_str, key, value)

def __delitem__(self, key):
host, port = ring.get_node(self.random_str).split(':')
conn = redis.Redis(host=host, port=port)
conn.hdel(self.random_str, key)

def delete(self):
"""从大字典删除session_id"""
del self.info_container[self.random_str]