Line 0
Link Here
|
|
|
1 |
--- data/db/android/create_db.py.orig 2020-10-11 20:12:08 UTC |
2 |
+++ data/db/android/create_db.py |
3 |
@@ -3,27 +3,27 @@ from pydict import * |
4 |
from id import * |
5 |
from valid_hanzi import * |
6 |
import sys |
7 |
+from functools import cmp_to_key |
8 |
|
9 |
def get_sheng_yun(pinyin): |
10 |
if pinyin == None: |
11 |
return None, None |
12 |
if pinyin == "ng": |
13 |
return "", "en" |
14 |
- for i in xrange(2, 0, -1): |
15 |
+ for i in range(2, 0, -1): |
16 |
t = pinyin[:i] |
17 |
if t in SHENGMU_DICT: |
18 |
return t, pinyin[len(t):] |
19 |
return "", pinyin |
20 |
|
21 |
def read_phrases(filename): |
22 |
- buf = file(filename).read() |
23 |
- buf = unicode(buf, "utf16") |
24 |
+ buf = open(filename, encoding='utf-16').read() |
25 |
buf = buf.strip() |
26 |
- for l in buf.split(u'\n'): |
27 |
- hanzi, freq, flag, pinyin = l.split(u' ', 3) |
28 |
+ for l in buf.split('\n'): |
29 |
+ hanzi, freq, flag, pinyin = l.split(' ', 3) |
30 |
freq = float(freq) |
31 |
pinyin = pinyin.split() |
32 |
- if any(map(lambda c: c not in valid_hanzi, hanzi)): |
33 |
+ if any([c not in valid_hanzi for c in hanzi]): |
34 |
continue |
35 |
yield hanzi, freq, pinyin |
36 |
|
37 |
@@ -33,9 +33,9 @@ def create_db(filename): |
38 |
# con.execute ("PRAGMA synchronous = NORMAL;") |
39 |
# con.execute ("PRAGMA temp_store = MEMORY;") |
40 |
# con.execute ("PRAGMA default_cache_size = 5000;") |
41 |
- print "PRAGMA synchronous = NORMAL;" |
42 |
- print "PRAGMA temp_store = MEMORY;" |
43 |
- print "PRAGMA default_cache_size = 5000;" |
44 |
+ print("PRAGMA synchronous = NORMAL;") |
45 |
+ print("PRAGMA temp_store = MEMORY;") |
46 |
+ print("PRAGMA default_cache_size = 5000;") |
47 |
|
48 |
|
49 |
sql = "CREATE TABLE py_phrase_%d (phrase TEXT, freq INTEGER, %s);" |
50 |
@@ -44,12 +44,12 @@ def create_db(filename): |
51 |
for j in range(0, i + 1): |
52 |
column.append ("s%d INTEGER" % j) |
53 |
column.append ("y%d INTEGER" % j) |
54 |
- print sql % (i, ",".join(column)) |
55 |
+ print(sql % (i, ",".join(column))) |
56 |
# con.execute(sql % (i, column)) |
57 |
# con.commit() |
58 |
|
59 |
records = list(read_phrases(filename)) |
60 |
- records.sort(lambda a, b: 1 if a[1] > b[1] else -1) |
61 |
+ records.sort(key=cmp_to_key(lambda a, b: 1 if a[1] > b[1] else -1)) |
62 |
records_new = [] |
63 |
i = 0 |
64 |
max_freq = 0.0 |
65 |
@@ -60,7 +60,7 @@ def create_db(filename): |
66 |
records_new.append((hanzi, i, pinyin)) |
67 |
records_new.reverse() |
68 |
|
69 |
- print "BEGIN;" |
70 |
+ print("BEGIN;") |
71 |
insert_sql = "INSERT INTO py_phrase_%d VALUES (%s);" |
72 |
for hanzi, freq, pinyin in records_new: |
73 |
columns = [] |
74 |
@@ -69,12 +69,12 @@ def create_db(filename): |
75 |
s, y = pinyin_id[s], pinyin_id[y] |
76 |
columns.append(s) |
77 |
columns.append(y) |
78 |
- values = "'%s', %d, %s" % (hanzi.encode("utf8"), freq, ",".join(map(str,columns))) |
79 |
+ values = "'%s', %d, %s" % (hanzi, freq, ",".join(map(str,columns))) |
80 |
|
81 |
sql = insert_sql % (len(hanzi) - 1, values) |
82 |
- print sql |
83 |
- print "COMMIT;" |
84 |
- print "VACUUM;" |
85 |
+ print(sql) |
86 |
+ print("COMMIT;") |
87 |
+ print("VACUUM;") |
88 |
|
89 |
def main(): |
90 |
create_db(sys.argv[1]) |