/* ****************************************************************************
 * Copyright 2011 VMware, Inc.  All rights reserved. -- VMware Confidential
 * ****************************************************************************/
package com.vmware.vide.vlogbrowser.core.utils;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.Enumeration;
import java.util.Scanner;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.tools.bzip2.CBZip2InputStream;
import org.apache.tools.tar.TarEntry;
import org.apache.tools.tar.TarInputStream;
import org.apache.tools.zip.ZipEntry;
import org.apache.tools.zip.ZipFile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.vmware.vide.vlogbrowser.core.fileops.CommandData;
import com.vmware.vide.vlogbrowser.core.fileops.LogFileManager;
import com.vmware.vide.vlogbrowser.core.fileops.LogFileUtils;

/**
 * Utilities to deal with archive files.
 *
 * TODO: These utilities should be broken down into
 * instances of an IArchive interface, so that additional
 * archive formats and operations can be added easily.
 */

public class ArchiveUtils {

    private static final byte[] PAX_USTAR_MAGIC = { 0x75, 0x73, 0x74, 0x61, 0x72, 0x00 };
    private static final int PAX_USTAR_MAGIC_OFFSET = 257;
    private static final byte[] GNU_TAR_MAGIC = { 0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00 };
    private static final int GNU_TAR_MAGIC_OFFSET = 257;
    private static final int TAR_MIN_HEADER_SIZE = 512;
    private static final byte[] BZIP2_MAGIC1 = { 0x42, 0x5A };
    private static final int BZIP2_MAGIC1_OFFSET = 0;
    private static final byte[] BZIP2_MAGIC2 = { 0x31, 0x41, 0x59, 0x26, 0x53, 0x59 };
    private static final int BZIP2_MAGIC2_OFFSET = 4;
    private static final int BZIP2_MIN_HEADER_SIZE = 20;
    private static final byte[] GZIP_MAGIC = { 0x1f, (byte) 0x8b };
    private static final int GZIP_MAGIC_OFFSET = 0;
    private static final int GZIP_MIN_HEADER_SIZE = 10;
    private static final byte[] ZIP_MAGIC = { 0x50, 0x4b, 0x03, 0x04 };
    private static final int ZIP_MAGIC_OFFSET = 0;
    private static final int ZIP_MIN_HEADER_SIZE = 30;
    private static final String READ_IO_ERROR_MSG = "I/O Error on reading the archive file [%s].";
    
    private static final Logger logger = LoggerFactory.getLogger(ArchiveUtils.class);

//    /**
//     * Find a directory where the specified file exists with the file name patterns in the entries
//     * of the archive file. The supported archive file formats are zip, tar, gzip-tar and
//     * bzip2-tar.
//     * @param file The archive file.
//     * @param filePatterns regular expressions of the file name patterns.
//     * @return the parent path of the first found file pattern. null if no file pattern was found
//     * or IO error.
//     */
//    public static String findFileNamePatternInArchive(File file, String[] filePatterns) {
//        List<Pattern> patterns = new ArrayList<Pattern>();
//        for (String pattern : filePatterns) {
//            patterns.add(Pattern.compile(pattern));
//        }
//        try {
//            if (isZipFile(file)) {
//                ZipFile zFile = null;
//                try {
//                    zFile = new ZipFile(file);
//                    return searchFileNamePatternsInZipEntries(zFile, patterns);
//                } finally {
//                    if (zFile != null) {
//                        zFile.close();
//                    }
//                }
//            } else if (isTarFile(file)) {
//                TarInputStream tis = null;
//                try {
//                    tis = new TarInputStream(new FileInputStream(file));
//                    return searchFileNamePatternsInTarEntries(tis, patterns);
//                } finally {
//                    if (tis != null) {
//                        tis.close();
//                    }
//                }
//            } else if (isGZipTarFile(file)) {
//                TarInputStream tis = null;
//                try {
//                    tis = new TarInputStream(new GZIPInputStream(new FileInputStream(file)));
//                    return searchFileNamePatternsInTarEntries(tis, patterns);
//                } finally {
//                    if (tis != null) {
//                        tis.close();
//                    }
//                }
//            } else if (isBZip2TarFile(file)) {
//                TarInputStream tis = null;
//                try {
//                    FileInputStream fis = new FileInputStream(file);
//                    /* CBZip2InputStream requires the stream starting at the third byte. */
//                    fis.read(new byte[2]);
//                    tis = new TarInputStream(new CBZip2InputStream(fis));
//                    return searchFileNamePatternsInTarEntries(tis, patterns);
//                } finally {
//                    if (tis != null) {
//                        tis.close();
//                    }
//                }
//            }
//        } catch (IOException e) {
//            logger.error(e.getMessage(), e);
//        }
//        return null;
//    }

    /**
     * Check if the path points to a supported archive file.
     * Note that simple compression does not an archive make!
     * Only files that contain a collection of other files are archives.
     *
     * @param path location on the local system of an file (e.g. .tgz, .zip etc.)
     * @return true if the path is an archive format we support.
     */

    public static boolean isArchive(String path) {
        File f = new File(path);
        if (isZipFile(f)) {
            return true;
        }
        if (isTarFile(f)) {
            return true;
        }
        if (isGZipTarFile(f)) {
            return true;
        }
        if (isBZip2TarFile(f)) {
            return true;
        }
        return false;
    }

    /**
     * Search the file name patterns in the entries of the zip file. An
     * additional size check can be considered, since there are empty files in
     * the bundle that can mislead for some actions.
     *
     * @param archiveFile The input stream of the tar file.
     * @param patterns The file patterns.
     * @param checkSize Whether should consider size != 0 in the search.
     * @return The path of the first found file pattern. null if no file pattern was found.
     * @throws IOException IO error.
     */
    public static String searchPatternsInZipEntries(File archiveFile,
            Collection<Pattern> patterns, boolean checkSize) throws IOException {
        ZipFile zFile = new ZipFile(archiveFile);
        try {
            for (Enumeration<?> e = zFile.getEntries(); e.hasMoreElements();) {
                ZipEntry ze = (ZipEntry) e.nextElement();
                String name = ze.getName();
                File file = new File(name);
                String path = file.getAbsolutePath();
                for (Pattern p : patterns) {
                    if (p.matcher(path).matches()) {
                        if (checkSize && ze.getSize() == 0) {
                            return null;
                        }
                        return path;
                    }
                }
            }
        } finally {
            zFile.close();
            zFile = null;
        }
        return null;
    }

    /**
     * @deprecated Use {@link #searchFileNamePatternsInTarGzEntries(File, Collection, boolean)} instead.
     */
    @Deprecated
    public static String searchFileNamePatternsInGzipTarEntries(
            File archiveFile, Collection<Pattern> patterns, boolean checkSize) throws IOException {
        TarInputStream tis = new TarInputStream(new GZIPInputStream(
                new FileInputStream(archiveFile)));
        try {
            TarEntry te = null;
            while ((te = tis.getNextEntry()) != null) {
                String path = te.getName();
                for (Pattern p : patterns) {
                    if (p.matcher(path).matches()) {
                        if (checkSize && te.getSize() == 0) {
                            return null;
                        }
                        return path;
                    }
                }
            }
        } finally {
            tis.close();
            tis = null;
        }
        return null;
    }

    /**
     * Search the file name patterns in the entries of the tar file. An
     * additional size check can be considered, since there are empty files in
     * the bundle that can mislead for some actions.
     *
     * @param archiveFile The input stream of the tar file.
     * @param patterns The file patterns.
     * @param checkSize Whether should consider size != 0 in the search.
     * @return The path of the first found file pattern. null if no file pattern was found.
     * @throws IOException IO error.
     */
    public static String searchFileNamePatternsInTarGzEntries(File archiveFile,
            Collection<Pattern> patterns, boolean checkSize) throws IOException {
        TarArchiveInputStream tis = new TarArchiveInputStream(
                new GzipCompressorInputStream(new FileInputStream(archiveFile)));
        String path = null;
        try {
            TarArchiveEntry te = null;
            while ((te = tis.getNextTarEntry()) != null) {
                path = te.getName();
                for (Pattern p : patterns) {
                    if (p.matcher(path).matches()) {
                        if (checkSize && te.getSize() == 0) {
                            return null;
                        }
                        return path;
                    }
                }
            }
        } finally {
            tis.close();
            tis = null;
        }
        return null;
    }

    /**
     * @deprecated Use {@link #searchFileNamePatternsInTarGzEntries(File, Collection, boolean)} instead.
     */
    @Deprecated
    public static String searchFileNamePatternsInTgzEntries(File archiveFile,
            Collection<Pattern> patterns, boolean checkSize) throws Exception {
        String commandParam = LogFileManager.SHELL_TAR_CMD + " -tvf "
                + LogFileUtils.escapeSpaces(archiveFile.getAbsolutePath());

        String[] localCommand = new String[] { LogFileManager.SHELL_EXEC,
                LogFileManager.SHELL_CMD_SWITCH, commandParam };

        CommandData data = LogFileUtils.runLocalCommand(localCommand,
                LogFileUtils.getParentDir(archiveFile.getAbsolutePath()));

        Scanner sc = new Scanner(data.getOutput());
        while (sc.hasNextLine()) {
            String path = sc.nextLine();
            for (Pattern p : patterns) {
                if (p.matcher(path).find()) {
                    if (checkSize) {
                        Pattern psize = Pattern
                                .compile("\\s+0\\s+[A-Z][a-z]{2}");
                        if (psize.matcher(path).find()) {
                            return null;
                        }
                    }
                    return path;
                }
            }
        }
        return null;
    }

    /**
     * Search the file name patterns in the entries of either zip or tar gzip files. An
     * additional size check can be considered, since there are empty files in
     * the bundle that can mislead for some actions.
     *
     * @param archiveFile The input stream of the tar file.
     * @param patterns The file patterns.
     * @param checkSize Whether should consider size != 0 in the search.
     * @return The path of the first found file pattern. null if no file pattern was found.
     * @throws IOException IO error.
     */
    public static String searchFileNamePatterns(File archiveFile,
            Collection<Pattern> patterns, boolean checkSize) throws IOException {
        String result = null;
        if (ArchiveUtils.isZipFile(archiveFile)) {
            result = ArchiveUtils.searchPatternsInZipEntries(archiveFile, patterns, checkSize);
        } else if (ArchiveUtils.isGZipTarFile(archiveFile)) {
            result = ArchiveUtils.searchFileNamePatternsInTarGzEntries(archiveFile, patterns, checkSize);
        }
        return result;
    }

    /**
     * quick check whether the file is a zip file or not.
     * @param f The file.
     * @return true if the file is a zip file. false otherwise or IO error.
     */
    public static boolean isZipFile(File f) {
        byte[] b = null;
        try {
            b = readBytes(f, ZIP_MIN_HEADER_SIZE);
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            return false;
        }
        if (b == null) {
            return false;
        }
        return compareBytes(b, ZIP_MAGIC_OFFSET, ZIP_MAGIC);
    }

    /**
     * quick check whether the file is a tar file or not.
     * @param f The file.
     * @return true if the file is a tar file. false otherwise or IO error.
     */
    public static boolean isTarFile(File f) {
        byte[] b = null;
        try {
            b = readBytes(f, TAR_MIN_HEADER_SIZE);
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            return false;
        }
        if (b == null) {
            return false;
        }
        return compareBytes(b, GNU_TAR_MAGIC_OFFSET, GNU_TAR_MAGIC)
                || compareBytes(b, PAX_USTAR_MAGIC_OFFSET, PAX_USTAR_MAGIC);
    }

    /**
     * quick check whether the file is a gzip file or not.
     * @param f The file.
     * @return true if the file is a gzip file. false otherwise or IO error.
     */
    public static boolean isGZipFile(File f) {
        byte[] b = null;
        try {
            b = readBytes(f, GZIP_MIN_HEADER_SIZE);
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            return false;
        }
        if (b == null) {
            return false;
        }
        return compareBytes(b, GZIP_MAGIC_OFFSET, GZIP_MAGIC);
    }

    /**
     * quick check whether the file is a bzip2 file or not.
     * @param f The file.
     * @return true if the file is a bzip2 file. false otherwise or IO error.
     */
    public static boolean isBZip2File(File f) {
        byte[] b = null;
        try {
            b =readBytes(f, BZIP2_MIN_HEADER_SIZE);
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
            return false;
        }
        if (b == null) {
            return false;
        }
        return compareBytes(b, BZIP2_MAGIC1_OFFSET, BZIP2_MAGIC1)
                && compareBytes(b, BZIP2_MAGIC2_OFFSET, BZIP2_MAGIC2);
    }

    /**
     * Read number of bytes from the beginning of the file.
     * @param f The file.
     * @param num The number of bytes.
     * @return byte array filled with read bytes, null if the specified number of bytes were
     * not read.
     * @throws IOException I/O error.
     */
    private static byte[] readBytes(File f, int num) throws IOException {
        FileInputStream fis = new FileInputStream(f);
        try {
            byte[] b = new byte[num];
            int n = fis.read(b);
            if (n < num) {
                return null;
            }
            return b;
        } finally {
            if (fis != null) {
                fis.close();
            }
        }
    }

    /**
     * Compare the byte contents with the byte pattern.
     * @param buff The byte contents.
     * @param offset The offset where starts at to compare in the byte contents.
     * @param pattern The byte pattern.
     * @return true if the pattern matches with the contents, false otherwise.
     */
    private static boolean compareBytes(byte[] buff, int offset, byte[] pattern) {
        for (byte b : pattern) {
            if (offset >= buff.length) {
                return false;
            }
            if (buff[offset] != b) {
                return false;
            }
            ++offset;
        }
        return true;
    }

    /**
     * quick check whether the file is a gzipped tar file or not.
     * @param f The file.
     * @return true if the file is a gzipped tar file, false otherwise or IO error.
     */
    public static boolean isGZipTarFile(File f) {
        if (isGZipFile(f)) {
            GZIPInputStream gzis = null;
            byte[] b = null;
            try {
                gzis = new GZIPInputStream(new FileInputStream(f));
                b = new byte[TAR_MIN_HEADER_SIZE];
                int n = gzis.read(b);
                if (n < TAR_MIN_HEADER_SIZE) {
                    return false;
                }
            } catch (IOException e) {
                logger.error(e.getMessage(), e);
            } finally {
                if (gzis != null) {
                    try {
                        gzis.close();
                    } catch (IOException e) {
                        return false;
                    }
                }
            }
            if (b == null) {
                return false;
            }
            return compareBytes(b, GNU_TAR_MAGIC_OFFSET, GNU_TAR_MAGIC)
                    || compareBytes(b, PAX_USTAR_MAGIC_OFFSET, PAX_USTAR_MAGIC);
        }
        return false;
    }

    /**
     * quick check whether the file is a bzip2ed tar file or not.
     * @param f The file.
     * @return true if the file is a bzip2ed tar file, false otherwise or IO error.
     */
    public static boolean isBZip2TarFile(File f) {
        if (isBZip2File(f)) {
            CBZip2InputStream bz2is = null;
            byte[] b = null;
            try {
                FileInputStream fis = new FileInputStream(f);
                fis.read(new byte[2]); /* skip the first 2 bytes */
                bz2is = new CBZip2InputStream(fis);
                b = new byte[TAR_MIN_HEADER_SIZE];
                int n = bz2is.read(b);
                if (n < TAR_MIN_HEADER_SIZE) {
                    return false;
                }
            } catch (IOException e) {
                logger.error(e.getMessage(), e);
            } finally {
                if (bz2is != null) {
                    try {
                        bz2is.close();
                    } catch (IOException e) {
                        return false;
                    }
                }
            }
            if (b == null) {
                return false;
            }
            return compareBytes(b, GNU_TAR_MAGIC_OFFSET, GNU_TAR_MAGIC)
                    || compareBytes(b, PAX_USTAR_MAGIC_OFFSET, PAX_USTAR_MAGIC);
        }
        return false;
    }
}
