-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCloudie.py
117 lines (104 loc) · 4.3 KB
/
Cloudie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
'''
This is a modification of Andy's Worcloud generator
(Somehow he always ends up helping me :P)
Stumbled upon it while reading his blog
http://peekaboo-vision.blogspot.in/
'''
import random
import numpy as np
#from __future__ import division
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from query_integral_image import query_integral_image
FONT_PATH = "/usr/share/fonts/truetype/ttf-japanese-gothic.ttf"
def make_wordcloud(words, counts, fname, width=800, height=400,
margin=5, ranks_only=False):
if len(counts) <= 0:
print("We need at least 1 word to plot a word cloud, got %d."
% len(counts))
font_path = FONT_PATH
max_count = float(max(counts))
#normalize counts
counts = counts/max_count
#sort words by count
inds = np.argsort(counts)[::-1]
counts = counts[inds]
words = words[inds]
#create image
img_grey = Image.new("L", (width, height))
draw = ImageDraw.Draw(img_grey)
integral = np.zeros((height, width), dtype=np.uint32)
img_array = np.asarray(img_grey)
font_sizes, positions, orientations = [], [], []
# intitiallize font size "large enough"
font_size = 1000
# start drawing grey image
for word, count in zip(words, counts):
# alternative way to set the font size
if not ranks_only:
font_size = min(font_size, int(100 * np.log(count + 100)))
while True:
#try to find position
font = ImageFont.truetype(font_path, font_size)
#transpose font optionally
orientation = random.choice([None, Image.ROTATE_90])
transposed_font = ImageFont.TransposedFont(font,
orientation=orientation)
draw.setfont(transposed_font)
#get size of resulting text
box_size = draw.textsize(word)
#finding possible places using integral images
result = query_integral_image(integral, box_size[1] + margin,
box_size[0] + margin)
if (result is not None or font_size == 0) :
break
#if we didn't find space make the font smaller
font_size -= 1
if font_size == 0:
break
x, y = np.array(result) + margin // 2
# actually draw the text
draw.text((y, x), word, fill="white")
positions.append((x, y))
orientations.append(orientation)
font_sizes.append(font_size)
# recompute integral image
img_array = np.asarray(img_grey)
# recompute bottom right
# the order of the cumsum's is important for speed ?!
partial_integral = np.cumsum(np.cumsum(img_array[x:, y:], axis=1),
axis=0)
if x > 0:
if y > 0:
partial_integral += (integral[x - 1, y:]
- integral[x - 1, y - 1])
else:
partial_integral += integral[x - 1, y:]
if y > 0:
partial_integral += integral[x:, y - 1][:, np.newaxis]
integral[x:, y:] = partial_integral
# redraw in color
img = Image.new("RGB", (width, height))
draw = ImageDraw.Draw(img)
everything = zip(words, font_sizes, positions, orientations)
for word, font_size, position, orientation in everything:
font = ImageFont.truetype(font_path, font_size)
#transpose font optionally
transposed_font = ImageFont.TransposedFont(font,
orientation=orientation)
draw.setfont(transposed_font)
draw.text((position[1], position[0]), word,
fill="hsl(%d" % random.randint(0, 255) + ", 80%, 50%)")
#img.show()
img.save(fname)
def init_cloud(keywords):
#from sklearn.feature_extraction.text import CountVectorizer
#cv = CountVectorizer(min_df=1, charset_error="ignore"
# stop_words="english", max_features=200)
#counts = cv.fit_transform([text]).toarray().ravel()
words = np.array([keyword[0] for keyword in keywords])
# throw away some words, normalize
#counts = counts[counts > 1]
counts = np.array([(k[1] + k[2]) for k in keywords])
counts = make_wordcloud(words, counts, "wordcloud.png")