/*************************************************************************
 *
 *  $RCSfile: red.cxx,v $
 *
 *  $Revision: 1.7 $
 *
 *  last change: $Author: lla $ $Date: 2001/11/23 14:01:33 $
 *
 *  The Contents of this file are made available subject to the terms of
 *  either of the following licenses
 *
 *         - GNU Lesser General Public License Version 2.1
 *         - Sun Industry Standards Source License Version 1.1
 *
 *  Sun Microsystems Inc., October, 2000
 *
 *  GNU Lesser General Public License Version 2.1
 *  =============================================
 *  Copyright 2000 by Sun Microsystems, Inc.
 *  901 San Antonio Road, Palo Alto, CA 94303, USA
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License version 2.1, as published by the Free Software Foundation.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA  02111-1307  USA
 *
 *
 *  Sun Industry Standards Source License Version 1.1
 *  =================================================
 *  The contents of this file are subject to the Sun Industry Standards
 *  Source License Version 1.1 (the "License"); You may not use this file
 *  except in compliance with the License. You may obtain a copy of the
 *  License at http://www.openoffice.org/license.html.
 *
 *  Software provided under this License is provided on an "AS IS" basis,
 *  WITHOUT WARRUNTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING,
 *  WITHOUT LIMITATION, WARRUNTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 *  See the License for the specific provisions governing your rights and
 *  obligations concerning the Software.
 *
 *  The Initial Developer of the Original Code is: Sun Microsystems, Inc..
 *
 *  Copyright: 2000 by Sun Microsystems, Inc.
 *
 *  All Rights Reserved.
 *
 *  Contributor(s): _______________________________________
 *
 *
 ************************************************************************/
#ifdef UNX
#include <unistd.h>
#endif

#include <stdio.h>

#ifndef _SV_SVAPP_HXX
#include <vcl/svapp.hxx>
#endif

#ifndef _FSYS_HXX
#include <tools/fsys.hxx>
#endif

#ifndef _FSYS_HXX
#include <tools/fsys.hxx>
#endif

#include "rtl/crc.h"
#include "setup2/sifsys.hxx"
#include "setup2/hashtbl.hxx"

#include "svunzip.h"

#define MAX_BUF_SIZE		32000

ByteString	aSourcePath;
BOOL		bTestOnlyMode = FALSE;
BOOL		bVerboseMode  = FALSE;

// ============================================================================

void PrintRedundancyEliminatorUsage()
{
	fprintf( stderr, "usage:\n" );
	fprintf( stderr, "\t-s <source path root>\n" );
	fprintf( stderr, "\t-t <test only mode>\n" );
	fprintf( stderr, "\t-v verbose\n" );
}

// ============================================================================

class RedundancyEliminator : public Application
{
public:
	virtual void Main() {}
};

RedundancyEliminator aApp;

// ============================================================================

typedef sal_uInt32 CRCSum;
// -----------------------------------------------------------------------------
struct FileEntry
{
	CRCSum     nCRC;
	sal_uInt32 nSize;
	ByteString aAbsFilename;
	BOOL bReplaced;
};

DECLARE_LIST( Filelist, FileEntry* );

Filelist aFilelist;

// ============================================================================
sal_uInt32 createCRC(ByteString const _aFullPathedFile, sal_uInt32 _nCRC = 0)
{
	FILE* file = fopen( _aFullPathedFile.GetBuffer(), "rb" );
	sal_uInt32 nCRC = _nCRC;
	
	if( file )
	{
		char buf[MAX_BUF_SIZE];
		int num = 0;
		while( ( num = fread( buf, sizeof(char), MAX_BUF_SIZE, file) ) > 0 )
			nCRC = rtl_crc32(nCRC, buf, num);
		fclose( file );
	}
	return nCRC;
}
// ============================================================================

ULONG   nTotalBytesWritten = 0;
long    nLastBytesWritten = 0;
ULONG   nTotalSize = 0;

void __UnzipCallback(long _lBytesWritten)
{
    if ( _lBytesWritten <= nLastBytesWritten )
        nTotalBytesWritten += nLastBytesWritten;
	
    nLastBytesWritten = _lBytesWritten;
    
    long nPercent = ( ( nTotalBytesWritten + _lBytesWritten ) / ( nTotalSize / 100 ) );
	
}

// ============================================================================

ByteString getSystemTempDir()
{
	static ByteString sTmpDir;
	
	if (sTmpDir.Len() == 0)
	{
		SiDirEntry aTmpDir;
		char* cTmpDir = getenv("TEMP");
		if ( cTmpDir != NULL )
		{
			sTmpDir = ByteString(cTmpDir);
			aTmpDir = sTmpDir;
		}
		if ( (cTmpDir == NULL) || (! aTmpDir.Exists()))
		{
			cTmpDir = getenv("TMP");
			if (cTmpDir != NULL)
			{
				sTmpDir = ByteString(cTmpDir); 
				aTmpDir = sTmpDir;
			}
			
			if ((cTmpDir != NULL) || (! aTmpDir.Exists()))
			{
				fprintf( stderr, "Error: No tmp directory found, please set TMP or TEMP env variable.\n" );
				PrintRedundancyEliminatorUsage();
				exit( -1 );
			}
		}
	}
	return sTmpDir;
}


// -----------------------------------------------------------------------------
CRCSum createCRCFromDir(DirEntry const& _aDirectory, CRCSum _nStartCRC = 0)
{
	CRCSum nCRC = _nStartCRC;
	Dir aDir( _aDirectory, FSYS_KIND_FILE | FSYS_KIND_DIR, FSYS_SORT_ASCENDING | FSYS_SORT_NAME );
	for( USHORT i = 0; i < aDir.Count(); ++i )
	{
		SiDirEntry aEntry( aDir[i] );

		ByteString aName = aEntry.GetName();
		if( aName.CompareIgnoreCaseToAscii(".")  == COMPARE_EQUAL ||
			aName.CompareIgnoreCaseToAscii("..") == COMPARE_EQUAL )
			continue;
		FileStat aStat( aEntry );
		if( aStat.GetKind() == FSYS_KIND_DIR )
		{
			// build crc sum, important is that we also consider the name, because
			// files which contains the same but have different names, MUST not
			// link, because we are IN a zip.

			nCRC = rtl_crc32(nCRC, aName.ToLowerAscii().GetBuffer(), aName.Len());
			nCRC = createCRCFromDir( aEntry.GetFull(), nCRC );
		}
		else
		{
			// consider the name, see about
			nCRC = rtl_crc32(nCRC, aName.ToLowerAscii().GetBuffer(), aName.Len());
			ByteString aFullname = aEntry.GetFull();
			nCRC = createCRC(aFullname, nCRC);

			// TODO / nice to have :
			// zip in zip
		}
	}
	return nCRC;
}

// -----------------------------------------------------------------------------
CRCSum decompressInTempFolder(ByteString const& _sFilename, ByteString const& aFullPathWithName)
{
	ByteString sTmpDir(getSystemTempDir());
	DirEntry aDir(sTmpDir);
	aDir += ByteString("red");
#ifdef UNX
	aDir += ByteString::CreateFromInt32(getpid());
#endif
	aDir += _sFilename;

	if (aDir.Exists())
	{
		aDir.Kill(FSYS_ACTION_RECURSIVE);
	}
	
	aDir.MakeDir();
	aDir.SetCWD();
	::SVUnzip( aFullPathWithName.GetBuffer(), "*.*", (const char*)"qq", NULL /* (UnzipCallBack*) __UnzipCallback */ );

	CRCSum nCRC = createCRCFromDir(aDir);

	if (aDir.Exists()) // some paranoid
	{
		aDir.Kill(FSYS_ACTION_RECURSIVE);
	}
	return nCRC;
}

// -----------------------------------------------------------------------------
static int nCount = 0;
void ReadSource( const ByteString& rPath )
{
	Dir aDir( rPath, FSYS_KIND_FILE | FSYS_KIND_DIR, FSYS_SORT_ASCENDING | FSYS_SORT_NAME );
	for( USHORT i = 0; i < aDir.Count(); ++i )
	{
		SiDirEntry aEntry( aDir[i] );

		ByteString aName = aEntry.GetName();
		if( aName.CompareIgnoreCaseToAscii(".")  == COMPARE_EQUAL ||
			aName.CompareIgnoreCaseToAscii("..") == COMPARE_EQUAL )
			continue;

		FileStat aStat( aEntry );
		if( aStat.GetKind() == FSYS_KIND_DIR )
		{
			ReadSource( aEntry.GetFull() );
		}
		else
		{
			if( aName.CompareIgnoreCaseToAscii("f0_", 3) != COMPARE_EQUAL &&
				aName.CompareIgnoreCaseToAscii("f_", 2)  != COMPARE_EQUAL )
				continue;

			if ( bVerboseMode )
			{
				fprintf( stderr, "." );
				if ((++ nCount) >= 80)
				{
					nCount = 0;
					fprintf(stderr, "\n");
				}
			}

			// we only want to check f0_ and f_ files
			ByteString aFullname = aEntry.GetFull();

			CRCSum nCRC = decompressInTempFolder(aName, aFullname);

			// sal_uInt32 nCRC = createCRC(aFullname);
			
			FileEntry* pNew = new FileEntry;
			pNew->nCRC = nCRC;
			pNew->nSize = aStat.GetSize();
			pNew->aAbsFilename = aEntry.GetFull();
			pNew->bReplaced = FALSE;

			aFilelist.Insert( pNew, LIST_APPEND );
		}
	}
}

// ============================================================================

void verboseFileEntry()
{
	for( USHORT n = 0; n < aFilelist.Count(); ++n )
	{
		FileEntry* pEntry = aFilelist.GetObject(n); // do not delete or link this file

		printf("%8x\t%d\t%s\n", pEntry->nCRC, pEntry->nSize, pEntry->aAbsFilename.GetBuffer());
	}
}

ULONG Eliminate( USHORT& _nrCount )
{
	ULONG nBytes = 0;
	for( USHORT n = 0; n < aFilelist.Count(); ++n )
	{
		FileEntry* pOrigEntry = aFilelist.GetObject(n); // do not delete or link this file

		// start at position n + 1, so we do not need to check kill and link our self.
		for( USHORT x = n + 1; x < aFilelist.Count(); ++x )
		{
			FileEntry* pCouldLink = aFilelist.GetObject(x);

			if( pCouldLink->bReplaced ||
				pCouldLink == pOrigEntry  )				 // paranoid, check to our self
			{
				continue;
			}

			if( ( pOrigEntry->nCRC != 0 ) &&
				( pOrigEntry->nCRC  == pCouldLink->nCRC ) && 
				( pOrigEntry->nSize == pCouldLink->nSize ) &&
				( pOrigEntry->nSize > 0 ) )
			{
				// both checksums are equal, so we link pCouldLink to pOrigEntry
				++_nrCount;

				// pOrigEntry->bReplaced = TRUE;
				pCouldLink->bReplaced = TRUE;
				nBytes += pCouldLink->nSize;
#ifdef UNX
				if( !bTestOnlyMode )
				{
					SiDirEntry aEntry( pCouldLink->aAbsFilename );
					aEntry.Kill();
					symlink( pOrigEntry->aAbsFilename.GetBuffer(), pCouldLink->aAbsFilename.GetBuffer() );
				}
#endif
				if( bVerboseMode )
				{
					fprintf( stderr, "%s   ==>   %s\n\n", pCouldLink->aAbsFilename.GetBuffer(), pOrigEntry->aAbsFilename.GetBuffer() );
				}
			}
		}
	}

	return nBytes;
}

// ============================================================================

int __LOADONCALLAPI main( int argv, char** argc )
{
	fprintf( stderr, "\nRedundancy Eliminator (c) 2000,2001 Sun Microsystems\n\n" );
	if( argv == 1 )
	{
		PrintRedundancyEliminatorUsage();
		exit( -1 );
	}

	// === evaluate parameter & environment ===================================
	for( USHORT n = 1; n < argv; n++ )
	{
		if( argc[ n ][ 0 ] == '-' )
		{
			switch( argc[ n ][ 1 ] )
			{
	   			case 's':	aSourcePath = argc[ ++n ];
							break;
				case 't':	bTestOnlyMode = TRUE;
							break;
				case 'v':	bVerboseMode = TRUE;
							break;
			}
		}
	}

	if( !aSourcePath.Len() )
	{
		fprintf( stderr, "error: no source path root defined\n" );
		PrintRedundancyEliminatorUsage();
		exit( -1 );
	}

	fprintf( stderr, "start file scan, create file checksums\n" );

	ReadSource( aSourcePath );

	fprintf( stderr, "%ld files found\nstart eliminating redundances\n", aFilelist.Count() );

	if (bVerboseMode)
	{
		verboseFileEntry();
	}
	
	USHORT nCnt = 0;
	ULONG nBytes = Eliminate( nCnt );

	fprintf( stderr, "eliminates %ld MB in %d files", ((nBytes/1024)/1024), nCnt );

	return 0;
}

