| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 | import pandas as pdimport openpyxl as oxfrom itertools import groupbyimport osclass ExcelUtil:    # 当前项目路径    dir_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + r'/elab_mvp/resources'    """        解析excel文件    """    def __init__(self, sheet_name=None, file_name=None):        if file_name:            self.path = os.path.join(self.dir_path, file_name)        else:            self.path = os.path.join(self.dir_path, 'mvp.xlsx')        if sheet_name:            self.sheet_name = sheet_name        else:            self.sheet_name = '硬标签+行为'    def read_excel_by_pd(self):        df = pd.read_excel(self.path)        data = df.head()        print('获取到的数据{}'.format(data))    def read_excel_by_ox(self):        work_book = ox.load_workbook(self.path, data_only=True)        work_sheet = work_book.get_sheet_by_name(self.sheet_name)        # print('max_row:{}, max_col:{}'.format(work_sheet.max_row, work_sheet.max_column))        return work_sheet    def init_crowd_info(self):        """            整理不同人群包含的父选序号        :return:        """        rows = [row for row in self.read_excel_by_ox().rows]        crowd_a = []        crowd_b = []        crowd_c = []        crowd_d = []        crowd_e = []        crowd_f = []        for row in rows[2:]:            option = row[4].value            a = row[6].value            if a is not None and a == 1 and option not in crowd_a:                crowd_a.append(option)            b = row[7].value            if b is not None and b == 1 and option not in crowd_b:                crowd_b.append(option)            c = row[8].value            if c is not None and c == 1 and option not in crowd_d:                crowd_c.append(option)            d = row[9].value            if d is not None and d == 1 and option not in crowd_d:                crowd_d.append(option)            e = row[10].value            if e is not None and e == 1 and option not in crowd_e:                crowd_e.append(option)            f = row[11].value            if f is not None and f == 1 and option not in crowd_f:                crowd_f.append(option)        return {'A': crowd_a, 'B': crowd_b, 'C': crowd_c, 'D': crowd_d, 'E': crowd_e, 'F': crowd_f}    def init_mvp_data(self):        """            获取每个标签包括的父题父选项编号        :return:        """        rows = [row for row in self.read_excel_by_ox().rows][24:]        tag_name = None        tag_type = None        datas = []        for row in rows:            tag_type_1 = row[0].value            tag = row[1].value            values = row[3].value            corr = row[4].value            if tag_type_1:                tag_type = tag_type_1            if tag:                tag_name = tag            if values is not None and values != '找不到':                datas.append([tag_type, tag_name, values, corr])        result = {}        datas.sort(key=lambda obj: obj[0])        for tag_type, sub_datas in groupby(datas, key=lambda obj: obj[0]):            sub_list = [x for x in sub_datas]            sub_list.sort(key=lambda obj: obj[1])            sub_result = {}            for name, items in groupby(sub_list, key=lambda obj: obj[1]):                orders = []                for n in items:                    orders.append([n[2], n[3]])                sub_result[name] = orders            result[tag_type] = sub_result        return result    def init_scores(self):        work_sheet = self.read_excel_by_ox()        rows = [row for row in work_sheet.rows]        datas = []        for row in rows[1:]:            if row[0].value is not None:                datas.append([row[0].value, row[1].value, row[2].value, row[3].value, row[4].value])        return datas    def init_module_info(self):        work_sheet = self.read_excel_by_ox()        max_column = work_sheet.max_column        rows = [row for row in work_sheet.rows][3:]        crowd_name = None        datas = []        for row in rows:            crowd = row[1].value            if crowd is not None:                crowd_name = crowd            behavior = row[2].value            score = row[4].value            for index in range(6, max_column - 1, 2):                module_name = row[index].value                if module_name is not None:                    weight = row[index + 1].value                    datas.append([crowd_name, behavior, score, module_name, weight])        results = {}        datas.sort(key=lambda obj: obj[0])        for name, items in groupby(datas, key=lambda obj: obj[0]):            sub_results = {}            sub_list = []            for it in items:                sub_list.append([x for x in it])            sub_list.sort(key=lambda obj: obj[3])            for name_1, itmes_1 in groupby(sub_list, key=lambda obj: obj[3]):                sub_data = []                for n in itmes_1:                    # print('         {}'.format(n[1]))                    sub_data.append([n[1], n[2], n[4]])                sub_results[name_1] = sub_data            results[name] = sub_results        return results
 |