KJMAN678 commited on
Commit
a5d77d0
·
1 Parent(s): acfb1a5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from janome.tokenizer import Tokenizer
3
+ from janome.analyzer import Analyzer
4
+ from janome.tokenfilter import *
5
+
6
+ import matplotlib.pyplot as plt
7
+ import japanize_matplotlib
8
+ import numpy as np
9
+ import pandas as pd
10
+ import collections
11
+ import itertools
12
+ from collections import Counter
13
+ import networkx as nx
14
+ from wordcloud import WordCloud
15
+
16
+ text = st.text_area(label='テキストを貼り付けてください', value='')
17
+
18
+ if len(text) > 0:
19
+
20
+ # トークン化
21
+ tokenizer = Tokenizer()
22
+ stop_word_list = []
23
+ noun_list = []
24
+
25
+ for token in tokenizer.tokenize(text):
26
+ split_token = token.part_of_speech.split(',')
27
+
28
+ if (split_token[0] == '名詞') | (split_token[0] == '動詞'):
29
+
30
+ if token.surface not in stop_word_list:
31
+ noun_list.append(token.surface)
32
+
33
+ # ワードクラウドの作成
34
+ wordcloud = WordCloud(
35
+ background_color='whitesmoke', # 背景色
36
+ font_path="font/ipaexg.ttf", # ダウンロードしたフォントのパス
37
+ width=500, # 横幅
38
+ height=500, # 高さ
39
+ )
40
+ noun_space = ' '.join(noun_list)
41
+ wordcloud.generate(noun_space)
42
+
43
+ # 共起行列
44
+ pair_list = list(itertools.combinations([n for n in noun_list if len(n) >= 2], 2))
45
+ cnt_pairs = Counter(pair_list)
46
+ tops = sorted(
47
+ cnt_pairs.items(),
48
+ key=lambda x: x[1], reverse=True
49
+ )[:50]
50
+
51
+ # 重み付きデータの生成
52
+ noun_1 = []
53
+ noun_2 = []
54
+ frequency = []
55
+
56
+ # データフレームの作成
57
+ for n,f in tops:
58
+ noun_1.append(n[0])
59
+ noun_2.append(n[1])
60
+ frequency.append(f)
61
+
62
+ df_G = pd.DataFrame({'前出名詞': noun_1, '後出名詞': noun_2, '出現頻度': frequency})
63
+
64
+ # 重み付きデータの設定
65
+ weighted_edges = np.array(df_G)
66
+
67
+ # グラフオブジェクトの生成
68
+ G = nx.Graph()
69
+
70
+ # 重み付きデータの読み込み
71
+ G.add_weighted_edges_from(weighted_edges)
72
+
73
+
74
+ # ワードクラウドとネットワーク図の表示
75
+ # ネットワーク図の描画
76
+ fig = plt.figure(figsize=(12, 20))
77
+ plt.rcParams["font.size"] = 18
78
+
79
+ plt.subplot(2, 1, 1)
80
+ plt.title("ネットワーク図")
81
+ nx.draw_networkx(G,
82
+ node_shape = "s",
83
+ node_color = "c",
84
+ node_size = 200,
85
+ edge_color = "gray",
86
+ font_family = "IPAexGothic" #"ipaexg00401/ipaexg.ttf" # フォント指定
87
+ );
88
+
89
+ plt.subplot(2, 1, 2)
90
+ plt.title("ワードクラウド")
91
+ plt.axis('off')
92
+ plt.imshow(wordcloud)
93
+
94
+ plt.tight_layout()
95
+
96
+ st.pyplot(fig)