tongce.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722
  1. from excel_util import ExcelUtil
  2. from mysql_db import MysqlDB
  3. from itertools import groupby
  4. class TongCe:
  5. """
  6. 同策测试数据清洗
  7. """
  8. # 统计筒体结果
  9. sql_1 = '''
  10. SELECT
  11. a.sub_question_id,
  12. b.sub_question_content,
  13. a.score,
  14. b.sub_option_content,
  15. count(1)
  16. FROM
  17. f_t_daren_score_2 a
  18. LEFT JOIN d_shangju_tiku_02 b ON a. STATUS = b. STATUS = 1
  19. WHERE
  20. a.testcase_id in %s and
  21. a.testcase_id = b.testcase_id
  22. AND a.sub_question_id = b.sub_question_id
  23. AND (
  24. a.score = b.score
  25. OR a.score = b.sub_option_id
  26. )
  27. GROUP BY
  28. b.sub_question_content,
  29. a.score,
  30. b.sub_option_content
  31. '''
  32. # 选项信息
  33. sql_2 = '''
  34. SELECT
  35. b.id as question_id,
  36. b. NAME as question_title,
  37. a.id as sub_question_id,
  38. a. NAME as sub_question_title,
  39. d.id as option_id,
  40. d.content as option_title,
  41. c.id as sub_option_id,
  42. c.content as sub_option_title
  43. FROM
  44. bq_sub_question a
  45. LEFT JOIN bq_question b ON a.father_id = b.id
  46. LEFT JOIN bq_sub_option c ON a.id = c.sub_question_id
  47. LEFT JOIN bq_option d ON c.father_id = d.id
  48. WHERE
  49. FIND_IN_SET(
  50. a.id,
  51. (
  52. SELECT
  53. GROUP_CONCAT(question_ids)
  54. FROM
  55. bq_testcase
  56. WHERE
  57. house_ids = %s
  58. GROUP BY
  59. house_ids
  60. )
  61. )
  62. AND a. STATUS = b. STATUS = c. STATUS = 1
  63. ORDER BY
  64. a.id
  65. '''
  66. # 表
  67. sql_3 = '''
  68. INSERT INTO mvp_page_display_match (
  69. house_id,
  70. question_id,
  71. question_title,
  72. sub_question_id,
  73. sub_question_title,
  74. option_id,
  75. option_content,
  76. sub_option_id,
  77. sub_option_content,
  78. data_item_tab,
  79. data_item_title,
  80. data_item_name,
  81. STATUS,
  82. creator,
  83. created
  84. )
  85. VALUES
  86. (
  87. %s,
  88. %s,
  89. %s,
  90. %s,
  91. %s,
  92. %s,
  93. %s,
  94. %s,
  95. %s,
  96. %s,
  97. %s,
  98. %s,
  99. 1,
  100. 'binren',
  101. now()
  102. )
  103. '''
  104. sql_4 = '''
  105. SELECT
  106. id,
  107. sub_question_id,
  108. sub_option_id,
  109. data_item_title
  110. FROM
  111. mvp_page_display_match
  112. WHERE
  113. STATUS = 1
  114. '''
  115. sql_5 = '''
  116. SELECT
  117. id
  118. FROM
  119. bq_testcase
  120. WHERE
  121. STATUS = 1
  122. AND FIND_IN_SET(
  123. (
  124. SELECT
  125. id
  126. FROM
  127. bq_house
  128. WHERE
  129. STATUS = 1
  130. AND NAME = %s
  131. ),
  132. house_ids
  133. )
  134. '''
  135. sql_6 = '''
  136. insert INTO mvp_page_display_data (
  137. crowd_info_id,
  138. match_id,
  139. page_display_rule_id,
  140. name,
  141. value,
  142. STATUS,
  143. creator,
  144. created
  145. )
  146. VALUES
  147. (%s, %s, %s, %s, %s, 1, 'binren', now())
  148. '''
  149. sql_7 = '''
  150. SELECT
  151. a.testcase_id,
  152. a.uuid,
  153. GROUP_CONCAT(
  154. DISTINCT b.sub_option_content
  155. )
  156. FROM
  157. f_t_daren_score_2 a
  158. LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
  159. WHERE
  160. a.testcase_id IN (84, 85, 86, 87)
  161. AND b.father_id IN (47, 48, 234, 254)
  162. and a.sub_question_id = b.sub_question_id and a.testcase_id = b.testcase_id
  163. GROUP BY
  164. a.testcase_id,
  165. a.uuid
  166. '''
  167. sql_8 = '''
  168. SELECT
  169. a.uuid,
  170. a.title,
  171. a.testcase_id,
  172. b.father_id,
  173. b.father_content,
  174. b.sub_option_id,
  175. b.sub_option_content
  176. FROM
  177. f_t_daren_score_2 a
  178. LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
  179. WHERE
  180. a.testcase_id = b.testcase_id
  181. AND a.sub_question_id = b.sub_question_id
  182. AND a.testcase_id IN (84, 85, 86, 87)
  183. '''
  184. sql_9 = '''
  185. SELECT
  186. x.city
  187. ,x.uuid
  188. ,x.sex
  189. ,x.nld
  190. ,x.zhifuli
  191. ,x.juzhujiegou
  192. ,m.father_content
  193. ,m.father_id
  194. ,m.sub_question_id
  195. ,m.sub_question_content
  196. ,m.option_tags as option_id
  197. ,m.father_content as option_content
  198. ,m.sub_option_id
  199. ,m.sub_option_content
  200. ,m.testcase_id
  201. ,m.title
  202. FROM
  203. (
  204. SELECT
  205. e.uuid,
  206. e.sex,
  207. f.nld,
  208. c.zhifuli,
  209. d.city,
  210. w.juzhujiegou
  211. FROM
  212. (
  213. SELECT
  214. a.testcase_id,
  215. a.uuid,
  216. b.sub_option_content AS sex
  217. FROM
  218. f_t_daren_score_2 a
  219. LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
  220. WHERE
  221. a.testcase_id IN (84, 85, 86, 87)
  222. AND b.father_id = 47
  223. AND a.sub_question_id = b.sub_question_id
  224. AND a.testcase_id = b.testcase_id
  225. GROUP BY
  226. a.testcase_id,
  227. a.uuid
  228. ) e
  229. LEFT JOIN (
  230. SELECT
  231. a.uuid,
  232. b.sub_option_content AS nld
  233. FROM
  234. f_t_daren_score_2 a
  235. LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
  236. WHERE
  237. a.testcase_id IN (84, 85, 86, 87)
  238. AND b.father_id = 48
  239. AND a.sub_question_id = b.sub_question_id
  240. AND a.testcase_id = b.testcase_id
  241. GROUP BY
  242. a.testcase_id,
  243. a.uuid
  244. ) f ON e.uuid = f.uuid
  245. LEFT JOIN (
  246. SELECT
  247. a.uuid,
  248. b.sub_option_content AS zhifuli
  249. FROM
  250. f_t_daren_score_2 a
  251. LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
  252. WHERE
  253. a.testcase_id IN (84, 85, 86, 87)
  254. AND b.father_id = 234
  255. AND a.sub_question_id = b.sub_question_id
  256. AND a.testcase_id = b.testcase_id
  257. GROUP BY
  258. a.testcase_id,
  259. a.uuid
  260. ) c ON f.uuid = c.uuid
  261. LEFT JOIN (
  262. SELECT
  263. a.uuid,
  264. b.sub_option_content AS city
  265. FROM
  266. f_t_daren_score_2 a
  267. LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
  268. WHERE
  269. a.testcase_id IN (84, 85, 86, 87)
  270. AND b.father_id = 254
  271. AND a.sub_question_id = b.sub_question_id
  272. AND a.testcase_id = b.testcase_id
  273. GROUP BY
  274. a.testcase_id,
  275. a.uuid
  276. ) d ON c.uuid = d.uuid
  277. left join (
  278. SELECT
  279. a.uuid,
  280. b.sub_option_content AS juzhujiegou
  281. FROM
  282. f_t_daren_score_2 a
  283. LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
  284. WHERE
  285. a.testcase_id IN (84, 85, 86, 87)
  286. AND b.father_id = 211
  287. AND a.sub_question_id = b.sub_question_id
  288. AND a.testcase_id = b.testcase_id
  289. GROUP BY
  290. a.testcase_id,
  291. a.uuid
  292. ) w on d.uuid = w.uuid
  293. ) x
  294. LEFT JOIN (
  295. SELECT
  296. a.uuid,
  297. a.title,
  298. a.testcase_id,
  299. b.father_id,
  300. b.father_content,
  301. b.sub_question_id,
  302. b.sub_question_content,
  303. b.sub_option_id,
  304. b.sub_option_content,
  305. b.option_tags
  306. FROM
  307. f_t_daren_score_2 a
  308. LEFT JOIN d_shangju_tiku_02 b ON a.score = b.sub_option_id
  309. WHERE
  310. a.testcase_id = b.testcase_id
  311. AND a.sub_question_id = b.sub_question_id
  312. AND a.testcase_id IN (84, 85, 86, 87)
  313. ) m ON x.uuid = m.uuid
  314. '''
  315. sql_10 = '''
  316. INSERT INTO f_t_daren_score_2 (
  317. testcase_id,
  318. title,
  319. uuid, score, created, sub_question_id
  320. )
  321. VALUE
  322. (84, '有钱人的生活就是很枯燥的……', %s, %s, %s, %s)
  323. '''
  324. sql_11 = '''
  325. select id, title_type, title_in_page, sub_question_id from mvp_page_display_rule where status = 1
  326. '''
  327. sql_12 = '''
  328. INSERT INTO mvp_page_display_rule (
  329. house_id,
  330. function_id,
  331. title_type,
  332. title_in_page,
  333. sub_question_id,
  334. STATUS,
  335. creator,
  336. created
  337. )
  338. VALUE
  339. (
  340. 67,
  341. 1,
  342. %s,
  343. %s,
  344. %s,
  345. 1,
  346. 'binren',
  347. now()
  348. )
  349. '''
  350. sql_13 = '''
  351. INSERT INTO mvp_crowd_info (
  352. house_id,
  353. pay_ability,
  354. age_area,
  355. city_name,
  356. life_cycle,
  357. STATUS,
  358. creator,
  359. created
  360. )
  361. VALUES
  362. (
  363. 67,
  364. %s,
  365. %s,
  366. %s,
  367. %s,
  368. 1,
  369. 'binren',
  370. now()
  371. )
  372. '''
  373. def __init__(self):
  374. self.shangju_db = MysqlDB('shangju')
  375. self.marketing_db = MysqlDB('bi_report')
  376. self.linshi_db = MysqlDB('linshi', db_type=1)
  377. self.options_info = ExcelUtil('工作表6', 'tongce.xlsx').read_options_info()
  378. self.table_type_info = ExcelUtil('新增项目数据项类型排序与展示图表类型管理表', 'table_type.xlsx').get_table_type_info()
  379. def get_question_info_from_db(self):
  380. result = self.shangju_db.select(self.sql_2, [67])
  381. insert_data = []
  382. for rt in result:
  383. rt = list(rt)
  384. option_configuration = self.options_info.get('67' + str(rt[6]))
  385. if option_configuration and len(option_configuration) == 4:
  386. rt.insert(0, 67)
  387. rt.extend(option_configuration[0:3])
  388. insert_data.append(rt)
  389. return insert_data
  390. def get_option_match_info(self):
  391. result = self.linshi_db.select(self.sql_4)
  392. return result
  393. # 支付力:376,年龄:29,城市:377,居住结构:395。
  394. sql_14 = '''
  395. select content from bq_sub_option where sub_question_id = %s
  396. '''
  397. def insert_into_mvp_crowd_info(self):
  398. zhifuli = self.shangju_db.select(self.sql_14, [376])
  399. age = self.shangju_db.select(self.sql_14, [29])
  400. city = self.shangju_db.select(self.sql_14, [377])
  401. juzhujiegou = self.shangju_db.select(self.sql_14, [395])
  402. insert_data = []
  403. for zfl in zhifuli:
  404. for a in age:
  405. for cy in city:
  406. for jzjg in juzhujiegou:
  407. insert_data.append([zfl, a, cy, jzjg])
  408. if len(insert_data) > 0:
  409. # self.linshi_db.truncate('mvp_crowd_info')
  410. self.linshi_db.add_some(self.sql_13, insert_data)
  411. sql_15 = '''
  412. select id, pay_ability, age_area, city_name, life_cycle from mvp_crowd_info where status = 1 and house_id = 67
  413. '''
  414. def get_crowd_info(self):
  415. data = self.linshi_db.select(self.sql_15)
  416. return data
  417. def insert_into_rule(self):
  418. option_info = self.options_info
  419. insert_data = []
  420. sub_question_ids = []
  421. for key in option_info.keys():
  422. data = option_info[key]
  423. if data[3] not in sub_question_ids:
  424. insert_data.append([data[0], data[1], data[3]])
  425. sub_question_ids.append(data[3])
  426. if len(insert_data) > 0:
  427. self.linshi_db.truncate('mvp_page_display_rule')
  428. self.linshi_db.add_some(self.sql_12, insert_data)
  429. def get_rule_data_info(self):
  430. data = self.linshi_db.select(self.sql_11)
  431. return data
  432. sql_16 = '''
  433. insert INTO mvp_page_display_data (
  434. crowd_info_id,
  435. match_id,
  436. value,
  437. STATUS,
  438. creator,
  439. created
  440. )
  441. VALUES
  442. (%s, %s, %s, 1, 'binren', now())
  443. '''
  444. def lingdi_data_scores(self):
  445. # 1: 写入mvp_crowd_info
  446. # self.insert_into_mvp_crowd_info()
  447. crowd_info = self.get_crowd_info()
  448. # 2: 写入rule
  449. # self.insert_into_rule()
  450. rule = self.get_rule_data_info()
  451. # 3: 读入答题数据
  452. self.answers = self.marketing_db.select(self.sql_9)
  453. # 4: 写入match信息
  454. match_data = self.get_question_info_from_db()
  455. # self.linshi_db.truncate('mvp_page_display_match')
  456. # self.linshi_db.add_some(self.sql_3, match_data)
  457. self.match_data_info = self.get_option_match_info()
  458. # 筛选写入data的数据
  459. insert_data = []
  460. no_data_case = []
  461. for ci in crowd_info:
  462. crowd_info_id = ci[0]
  463. zhifuli = ci[1]
  464. age = ci[2]
  465. city = ci[3]
  466. juzhujiegou = ci[4]
  467. data = self.filter_people(city, age, zhifuli, juzhujiegou)
  468. data.sort(key=lambda obj: obj[0])
  469. for key, questions_data in groupby(data, key=lambda obj: obj[0]):
  470. question_data_list = []
  471. for qd in questions_data:
  472. question_data_list.append([x for x in qd])
  473. rule_id = self.get_rule_id(key, rule)
  474. if rule_id is not None:
  475. question_people = len(question_data_list)
  476. if question_people > 0:
  477. question_data_list.sort(key=lambda obj: obj[3])
  478. for option_name, option_data_1 in groupby(question_data_list, key=lambda obj: obj[3]):
  479. option_data_list = []
  480. for od in option_data_1:
  481. option_data_list.append([x for x in od])
  482. if len(option_data_list) >= 0:
  483. option_id = option_data_list[0][2]
  484. for md in self.match_data_info:
  485. if str(md[1]) == str(key) and str(md[2]) == str(option_id):
  486. match_id = md[0]
  487. option_name_alias = md[3]
  488. insert_data.append([crowd_info_id, match_id, rule_id, option_name_alias, len(option_data_list) / question_people])
  489. else:
  490. no_data_case.append([zhifuli, city, age, juzhujiegou, option_name])
  491. else:
  492. no_data_case.append([zhifuli, city, age, juzhujiegou, key])
  493. quanliang_scores = self.scores()
  494. for q_s in quanliang_scores:
  495. rule_id = self.get_rule_id(q_s[0], rule)
  496. if rule_id:
  497. for md in self.match_data_info:
  498. if str(md[1]) == str(q_s[0]) and str(md[1]) == str(q_s[1]):
  499. match_id = md[0]
  500. option_name_alias = md[3]
  501. insert_data.append([5405, match_id, rule_id, option_name_alias, q_s[2]])
  502. if len(insert_data) > 0:
  503. self.linshi_db.truncate('mvp_page_display_data')
  504. self.linshi_db.add_some(self.sql_6, insert_data)
  505. return {'写入库中的数据': len(insert_data), '无数据': len(no_data_case)}
  506. sql_20 = '''
  507. UPDATE mvp_page_display_rule
  508. SET display_type = %s,
  509. display_size = %s
  510. WHERE
  511. title_in_page = %s
  512. '''
  513. def table_type_insert(self):
  514. self.shangju_db.update(self.sql_20, self.table_type_info)
  515. def get_rule_id(self, sub_question_id, rule):
  516. for re in rule:
  517. if str(re[3]) == str(sub_question_id):
  518. return re[0]
  519. def filter_people(self, city, age, zhifuli, juzhujiegou):
  520. result = []
  521. for answer in self.answers:
  522. if answer[0] == city and answer[3] == age and answer[4] == zhifuli and answer[5] == juzhujiegou:
  523. # 子题id, 子题题目,子选项id,子选项题目
  524. result.append([answer[8], answer[9], answer[10], answer[11]])
  525. return result
  526. def get_testcase_ids_by_house_name(self, house_name):
  527. testcase_ids = self.shangju_db.select(self.sql_5, [house_name])
  528. return testcase_ids
  529. def scores(self):
  530. testcase_ids = self.get_testcase_ids_by_house_name('同策 领地')
  531. db_data = self.marketing_db.select(self.sql_1, [testcase_ids])
  532. answer = []
  533. for data in db_data:
  534. answer.append([data[0], data[2], data[4]])
  535. answer.sort(key=lambda obj: obj[0])
  536. sub_option_score = []
  537. for sub_question_id, others in groupby(answer, key=lambda obj: obj[0]):
  538. others_data = []
  539. for ot in others:
  540. others_data.append([x for x in ot])
  541. sub_question_count = sum([x[2] for x in others_data])
  542. for td in others_data:
  543. sub_option_id = td[1]
  544. sub_option_count = td[2]
  545. rate = int(sub_option_count) / sub_question_count
  546. sub_option_score.append([sub_question_id, sub_option_id, rate])
  547. return sub_option_score
  548. def tongce(self):
  549. """
  550. tongce测试数据清洗
  551. :return:
  552. """
  553. match_data = self.get_question_info_from_db()
  554. # self.linshi_db.truncate('mvp_page_display_match')
  555. self.linshi_db.add_some(self.sql_3, match_data)
  556. scores = self.scores()
  557. match_data_info = self.get_option_match_info()
  558. dispaly_data = []
  559. for score in scores:
  560. sub_question_id = score[0]
  561. sub_option_id = score[1]
  562. value = score[2]
  563. for mi in match_data_info:
  564. if str(mi[1]) == str(sub_question_id) and str(mi[2]) == str(sub_option_id):
  565. dispaly_data.append([mi[0], value])
  566. if len(dispaly_data) > 0:
  567. self.linshi_db.truncate('mvp_page_display_data')
  568. self.linshi_db.add_some(self.sql_6, dispaly_data)
  569. return {'插入数据条数': len(dispaly_data), 'scores': dispaly_data}
  570. def wenjuanxin_84(self):
  571. excel = ExcelUtil('Sheet1', '84_1500.xlsx')
  572. insert_data = excel.wenjuanxin_84()
  573. self.linshi_db.add_some(self.sql_10, insert_data)
  574. print()
  575. sql_17 = '''
  576. SELECT
  577. id,
  578. uuid,
  579. created,
  580. `status`,
  581. sub_question_id,
  582. testcase_id,
  583. title,
  584. score,
  585. province,
  586. city,
  587. district
  588. FROM
  589. f_t_daren_score_2
  590. WHERE
  591. testcase_id IN (84, 85, 86, 87)
  592. AND sub_question_id = 377
  593. AND score = 2917
  594. AND (
  595. city IN (
  596. '昆明市',
  597. '西安市',
  598. '咸阳市',
  599. '郑州市',
  600. '洛阳市',
  601. '武汉市',
  602. '襄阳市',
  603. '重庆市',
  604. '璧山'
  605. )
  606. OR province IN (
  607. '昆明市',
  608. '西安市',
  609. '咸阳市',
  610. '郑州市',
  611. '洛阳市',
  612. '武汉市',
  613. '襄阳市',
  614. '重庆市',
  615. '璧山'
  616. )
  617. OR district IN (
  618. '昆明市',
  619. '西安市',
  620. '咸阳市',
  621. '郑州市',
  622. '洛阳市',
  623. '武汉市',
  624. '襄阳市',
  625. '重庆市',
  626. '璧山区'
  627. )
  628. )
  629. '''
  630. sql_18 = '''
  631. update f_t_daren_score_2 set score = %s where id = %s
  632. '''
  633. city_info = {
  634. '昆明市': 2918,
  635. '西安市': 2919,
  636. '咸阳市': 2920,
  637. '郑州市': 2921,
  638. '洛阳市': 2922,
  639. '武汉市': 2923,
  640. '襄阳市': 2924,
  641. '重庆市': 2925,
  642. '璧山市': 2926
  643. }
  644. def other_city_clean(self):
  645. update_data = []
  646. need_update_data = self.marketing_db.select(self.sql_17)
  647. for nd in need_update_data:
  648. id = nd[0]
  649. province = nd[8]
  650. city = nd[9]
  651. district = nd[10]
  652. bishan = self.city_info.get(district)
  653. if bishan:
  654. update_data.append([bishan, id])
  655. else:
  656. city_id = self.city_info.get(city)
  657. if city_id:
  658. update_data.append([city_id, id])
  659. else:
  660. province_id = self.city_info.get(province)
  661. if province_id:
  662. update_data.append([province_id, id])
  663. self.marketing_db.add_some(self.sql_18, update_data)
  664. return len(update_data)
  665. sql_19 = '''
  666. select GROUP_CONCAT(id)from f_t_daren_score_2 where testcase_id in (84, 85, 86, 87) and score = 2925 and district = '璧山区'
  667. update f_t_daren_score_2 set score = 2926 where id in (979728,979890,981251,984783,985250,985564,990999)
  668. '''
  669. def chongqin_to_bishan(self):
  670. pass
  671. if __name__ == '__main__':
  672. tongce = TongCe()
  673. tongce.insert_into_rule()