﻿# -*- coding: UTF-8 -*-

import re
import xlrd
import traceback

class Excel(object):

    def __init__(self, filename, logger):
        """
         初始化类，初始化获取文件路径
        :param filename:
        """
        self.logger = logger
        self.filename = filename

    def getCellInfosOneColumn(self, sheet_name, start_row=0, columnIndex=0):
        """
        解析excel文档，获取特定sheet页中的数据（可以指定start_row）
        注意：当前方法只适用于.xls文件格式。如果为xlsx文件进行读操作，需要使用openpyxl模块实现
        :param sheet_name:
        :param start_row:
        :param columnIndex:
        :return:cellDescList
        """

        cellDescList = []
        try:
            if not self.filename.endswith(".xls"):
                return False, cellDescList
            data = xlrd.open_workbook(self.filename)
            self.table = data.sheet_by_name(sheet_name)
            nrows = self.table.nrows
            for rownum in range(start_row, nrows, 1):
                cell = unicode(self.table.cell(rownum, columnIndex))

                if bool(re.search('empty', cell, re.IGNORECASE)):
                    continue
                else:
                    # 表格内容以空格结尾可能会被读取为xa0，替换为空格
                    cellDesc = cell.replace('text:u', '').replace('\'', '').replace('\\xa0', ' ').strip()
                    cellDescList.append(cellDesc)
        except:
            self.logger.error("[ExcelParse] getCellInfosOneRow exception: " + traceback.format_exc())
            return False, cellDescList

        return True, cellDescList
