CStdioFile扩展(支持Ansi、Unicode、Utf-8等文本格式)

2023-11-15

头文件声明(CStdioFileEx.h):

// StdioFileEx.h: interface for the CStdioFileEx class.
//
// Version 1.1 23 August 2003.	Incorporated fixes from Dennis Jeryd.
// Version 1.3 19 February 2005. Incorporated fixes from Howard J Oh and some of my own.
// Version 1.4 26 February 2005. Fixed stupid screw-up in code from 1.3.
// Version 1.5 18 November 2005. - Incorporated fixes from Andy Goodwin.
//											- Allows code page to be specified for reading/writing
//											- Properly calculates multibyte buffer size instead of
//												assuming lstrlen(s).
//											- Should handle UTF8 properly.
//
// Copyright David Pritchard 2003-2005. davidpritchard@ctv.es
//
// You can use this class freely, but please keep my ego happy 
// by leaving this comment in place.
//
//

#if !defined(AFX_STDIOFILEEX_H__41AFE3CA_25E0_482F_8B00_C40775BCDB81__INCLUDED_)
#define AFX_STDIOFILEEX_H__41AFE3CA_25E0_482F_8B00_C40775BCDB81__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#define	nUNICODE_BOM					0xFEFF		// Unicode "byte order mark" which goes at start of file
#define	nUTF8_BOM						0xEFBBBF	// utf-8
#define	sNEWLINE						_T("\r\n")	// New line characters
#define	sDEFAULT_UNICODE_FILLER_CHAR	"#"			// Filler char used when no conversion from Unicode to local code page is possible

class CStdioFileEx : public CStdioFile
{
public:
	CStdioFileEx();
	CStdioFileEx(LPCTSTR lpszFileName, UINT nOpenFlags);

	virtual BOOL	Open(LPCTSTR lpszFileName, UINT nOpenFlags, CFileException* pError = NULL);
	virtual BOOL	ReadString(CString& rString);
	virtual void	WriteString(LPCTSTR lpsz);
	bool			IsFileUnicodeText() { return m_bIsUnicodeText; }
	bool			IsFileUtf8Text() { return m_bIsUtf8Text; }
	unsigned long	GetCharCount();

	// Additional flag to allow Unicode text writing
	static const UINT modeWriteUnicode;

	void			SetCodePage(IN const UINT nCodePage);

	// static utility functions

	// --------------------------------------------------------------------------------------------
	//
	//	CStdioFileEx::GetUnicodeStringFromMultiByteString()
	//
	// --------------------------------------------------------------------------------------------
	// Returns:    int - num. of chars written (0 means error)
	// Parameters:	char *		szMultiByteString	(IN)		Multi-byte input string
	//				wchar_t*	szUnicodeString		(OUT)		Unicode outputstring
	//				int			nUnicodeBufferSize	(IN)		Size of Unicode output buffer (chars) (IN)
	//				int			nCodePage			(IN)		Code page used to perform conversion
	//																Default = -1 (Get local code page).
	//
	// Purpose:		Gets a Unicode string from a MultiByte string.
	// Notes:		None.
	// Exceptions:	None.
	//
	static int		GetUnicodeStringFromMultiByteString(IN LPCSTR szMultiByteString, OUT wchar_t* szUnicodeString, IN int nUnicodeBufferSize, IN int nCodePage = -1);

	// --------------------------------------------------------------------------------------------
	//
	//	CStdioFileEx::GetMultiByteStringFromUnicodeString()
	//
	// --------------------------------------------------------------------------------------------
	// Returns:    int - number of characters written. 0 means error
	// Parameters:	wchar_t *	szUnicodeString			(IN)	Unicode input string
	//				char*		szMultiByteString		(OUT)	Multibyte output string
	//				int			nMultiByteBufferSize	(IN)	Multibyte buffer size
	//				int			nCodePage				(IN)	Code page used to perform conversion
	//																Default = -1 (Get local code page).
	//
	// Purpose:		Gets a MultiByte string from a Unicode string.
	// Notes:		.
	// Exceptions:	None.
	//
	static int		GetMultiByteStringFromUnicodeString(wchar_t* szUnicodeString, char* szMultiByteString,
		int nMultiByteBufferSize, int nCodePage = -1);

	//---------------------------------------------------------------------------------------------------
	//
	//	CStdioFileEx::GetRequiredMultiByteLengthForUnicodeString()
	//
	//---------------------------------------------------------------------------------------------------
	// Returns:    int
	// Parameters: wchar_t * szUnicodeString,int nCodePage=-1
	//
	// Purpose:		Obtains the multi-byte buffer size needed to accommodate a converted Unicode string.
	//	Notes:		We can't assume that the buffer length is simply equal to the number of characters
	//					because that wouldn't accommodate multibyte characters!
	//
	static int		GetRequiredMultiByteLengthForUnicodeString(wchar_t* szUnicodeString, int nCodePage = -1);


	// --------------------------------------------------------------------------------------------
	//
	//	CStdioFileEx::IsFileUnicode()
	//
	// --------------------------------------------------------------------------------------------
	// Returns:    bool
	// Parameters: const CString& sFilePath
	//
	// Purpose:		Determines whether a file is Unicode by reading the first character and detecting
	//					whether it's the Unicode byte marker.
	// Notes:		None.
	// Exceptions:	None.
	//
	static bool IsFileUnicode(const CString& sFilePath);
	
	static bool IsFileUtf8(const CString& sFilePath);

	static UINT	GetCurrentLocaleCodePage();

protected:
	UINT	ProcessFlags(const CString& sFilePath, UINT& nOpenFlags);

	bool	m_bIsUnicodeText;
	bool	m_bIsUtf8Text;
	UINT	m_nFlags;
	int		m_nFileCodePage;
};

#endif // !defined(AFX_STDIOFILEEX_H__41AFE3CA_25E0_482F_8B00_C40775BCDB81__INCLUDED_)

源码定义(CStdioFileEx.cpp):

// StdioFileEx.cpp: implementation of the CStdioFileEx class.
//
// Version 1.1 23 August 2003.	Incorporated fixes from Dennis Jeryd.
// Version 1.3 19 February 2005. Incorporated fixes from Howard J Oh and some of my own.
// Version 1.4 26 February 2005. Fixed stupid screw-up in code from 1.3.
// Version 1.5 18 November 2005. - Incorporated fixes from Andy Goodwin.
//											- Allows code page to be specified for reading/writing
//											- Properly calculates multibyte buffer size instead of
//												assuming lstrlen(s).
//											- Should handle UTF8 properly.
//
// Copyright David Pritchard 2003-2005. davidpritchard@ctv.es
//
// You can use this class freely, but please keep my ego happy 
// by leaving this comment in place.
//
//

#include "stdafx.h"
#include "CStdioFileEx.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#define new DEBUG_NEW
#endif

//
// Construction/Destruction
//

/*static*/ const UINT CStdioFileEx::modeWriteUnicode = 0x20000; // Add this flag to write in Unicode

CStdioFileEx::CStdioFileEx() 
	: CStdioFile()
{
	m_bIsUnicodeText = false;
	m_nFileCodePage = -1;
	m_bIsUtf8Text = false;
	m_nFlags = CFile::typeText | CFile::modeReadWrite;
}

CStdioFileEx::CStdioFileEx(LPCTSTR lpszFileName, UINT nOpenFlags)
	:CStdioFile(lpszFileName, ProcessFlags(lpszFileName, nOpenFlags))
{
	m_nFileCodePage = -1;
}

void CStdioFileEx::SetCodePage(IN const UINT nCodePage)
{
	m_nFileCodePage = (int)nCodePage;
}

BOOL CStdioFileEx::Open(LPCTSTR lpszFileName, UINT nOpenFlags, CFileException* pError /*=NULL*/)
{
	// Process any Unicode stuff
	ProcessFlags(lpszFileName, nOpenFlags);
	if (m_bIsUtf8Text)
		SetCodePage(CP_UTF8);
	return CStdioFile::Open(lpszFileName, nOpenFlags, pError);
}

BOOL CStdioFileEx::ReadString(CString& rString)
{
	const int	nMAX_LINE_CHARS = 4096;
	BOOL		bReadData = FALSE;
	LPTSTR		lpsz;
	int			nLen = 0;
	wchar_t* pszUnicodeString = NULL;
	char* pszMultiByteString = NULL;
	int			nChars = 0;

	try
	{
		// If at position 0, discard byte-order mark before reading
		if (!m_pStream || (GetPosition() == 0 && m_bIsUnicodeText))
		{
			wchar_t	cDummy;
			//		Read(&cDummy, sizeof(_TCHAR));
			Read(&cDummy, sizeof(wchar_t));
		}

		// If compiled for Unicode
#ifdef _UNICODE
		if (m_bIsUnicodeText)
		{
			// Do standard stuff - Unicode to Unicode. Seems to work OK.
			bReadData = CStdioFile::ReadString(rString);
		}
		else
		{
			pszUnicodeString = new wchar_t[nMAX_LINE_CHARS];
			pszMultiByteString = new char[nMAX_LINE_CHARS];

			// Initialise to something safe
			memset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
			memset(pszMultiByteString, 0, sizeof(char) * nMAX_LINE_CHARS);

			// Read the string
			bReadData = (NULL != fgets(pszMultiByteString, nMAX_LINE_CHARS, m_pStream));

			if (bReadData)
			{
				// Convert multibyte to Unicode, using the specified code page
				nChars = GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString, nMAX_LINE_CHARS, m_nFileCodePage);

				if (nChars > 0)
				{
					rString = (CString)pszUnicodeString;
				}
			}
		}
#else

		if (!m_bIsUnicodeText)
		{
			// Do standard stuff -- read ANSI in ANSI
			bReadData = CStdioFile::ReadString(rString);

			// Get the current code page
			UINT nLocaleCodePage = GetCurrentLocaleCodePage();

			// If we got it OK...
			if (nLocaleCodePage > 0)
			{
				// if file code page does not match the system code page, we need to do a double conversion!
				if (nLocaleCodePage != (UINT)m_nFileCodePage)
				{
					int nStringBufferChars = rString.GetLength() + 1;

					pszUnicodeString = new wchar_t[nStringBufferChars];

					// Initialise to something safe
					memset(pszUnicodeString, 0, sizeof(wchar_t) * nStringBufferChars);

					// Convert to Unicode using the file code page
					nChars = GetUnicodeStringFromMultiByteString(rString, pszUnicodeString, nStringBufferChars, m_nFileCodePage);

					// Convert back to multibyte using the system code page
					// (This doesn't really confer huge advantages except to avoid "mangling" of non-convertible special
					// characters. So, if a file in the E.European code page is displayed on a system using the 
					// western European code page, special accented characters which the system cannot display will be
					// replaced by the default character (a hash or something), rather than being incorrectly mapped to
					// other, western European accented characters).
					if (nChars > 0)
					{
						// Calculate how much we need for the MB buffer (it might be larger)
						nStringBufferChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, nLocaleCodePage);
						pszMultiByteString = new char[nStringBufferChars];

						nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nStringBufferChars, nLocaleCodePage);
						rString = (CString)pszMultiByteString;
					}
				}
			}
		}
		else
		{
			pszUnicodeString = new wchar_t[nMAX_LINE_CHARS];

			// Initialise to something safe
			memset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);

			// Read as Unicode, convert to ANSI

			// Bug fix by Dennis Jeryd 06/07/2003: initialise bReadData
			bReadData = (NULL != fgetws(pszUnicodeString, nMAX_LINE_CHARS, m_pStream));

			if (bReadData)
			{
				// Calculate how much we need for the multibyte string
				int nRequiredMBBuffer = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);
				pszMultiByteString = new char[nRequiredMBBuffer];

				nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nRequiredMBBuffer, m_nFileCodePage);

				if (nChars > 0)
				{
					rString = (CString)pszMultiByteString;
				}
			}

		}
#endif

		// Then remove end-of-line character if in Unicode text mode
		if (bReadData)
		{
			// Copied from FileTxt.cpp but adapted to Unicode and then adapted for end-of-line being just '\r'. 
			nLen = rString.GetLength();
			if (nLen > 1 && rString.Mid(nLen - 2) == sNEWLINE)
			{
				rString.GetBufferSetLength(nLen - 2);
			}
			else
			{
				lpsz = rString.GetBuffer(0);
				if (nLen != 0 && (lpsz[nLen - 1] == _T('\r') || lpsz[nLen - 1] == _T('\n')))
				{
					rString.GetBufferSetLength(nLen - 1);
				}
			}
		}
	}
	// Ensure we always delete in case of exception
	catch (...)
	{
		if (pszUnicodeString)	delete[] pszUnicodeString;

		if (pszMultiByteString) delete[] pszMultiByteString;

		throw;
	}

	if (pszUnicodeString)		delete[] pszUnicodeString;

	if (pszMultiByteString)		delete[] pszMultiByteString;

	return bReadData;
}



// --------------------------------------------------------------------------------------------
//
//	CStdioFileEx::WriteString()
//
// --------------------------------------------------------------------------------------------
// Returns:    void
// Parameters: LPCTSTR lpsz
//
// Purpose:		Writes string to file either in Unicode or multibyte, depending on whether the caller specified the
//					CStdioFileEx::modeWriteUnicode flag. Override of base class function.
// Notes:		If writing in Unicode we need to:
//						a) Write the Byte-order-mark at the beginning of the file
//						b) Write all strings in byte-mode
//					-	If we were compiled in Unicode, we need to convert Unicode to multibyte if 
//						we want to write in multibyte
//					-	If we were compiled in multi-byte, we need to convert multibyte to Unicode if 
//						we want to write in Unicode.
// Exceptions:	None.
//
void CStdioFileEx::WriteString(LPCTSTR lpsz)
{
	wchar_t* pszUnicodeString = NULL;
	char* pszMultiByteString = NULL;

	try
	{
		// If writing Unicode and at the start of the file, need to write byte mark
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			// If at position 0, write byte-order mark before writing anything else
			if (!m_pStream || GetPosition() == 0)
			{
				wchar_t cBOM = (wchar_t)nUNICODE_BOM;
				CFile::Write(&cBOM, sizeof(wchar_t));
			}
		}

		// If compiled in Unicode...
#ifdef _UNICODE

		// If writing Unicode, no conversion needed
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			// Write in byte mode
			CFile::Write(lpsz, lstrlen(lpsz) * sizeof(wchar_t));
		}
		// Else if we don't want to write Unicode, need to convert
		else
		{
			int		nChars = lstrlen(lpsz) + 1;				// Why plus 1? Because yes
//			int		nBufferSize = nChars * sizeof(char);	// leave space for multi-byte chars
			int		nCharsWritten = 0;
			int		nBufferSize = 0;

			pszUnicodeString = new wchar_t[nChars];

			// Copy string to Unicode buffer
			lstrcpy(pszUnicodeString, lpsz);

			// Work out how much space we need for the multibyte conversion
			nBufferSize = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);
			pszMultiByteString = new char[nBufferSize];

			// Get multibyte string
			nCharsWritten = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nBufferSize, m_nFileCodePage);

			if (nCharsWritten > 0)
			{
				// Do byte-mode write using actual chars written (fix by Howard J Oh)
	//			CFile::Write((const void*)pszMultiByteString, lstrlen(lpsz));
				CFile::Write((const void*)pszMultiByteString,
					nCharsWritten * sizeof(char));
			}
		}
		// Else if *not* compiled in Unicode
#else
		// If writing Unicode, need to convert
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			int		nChars = lstrlen(lpsz) + 1;	 // Why plus 1? Because yes
			int		nBufferSize = nChars * sizeof(wchar_t);
			int		nCharsWritten = 0;

			pszUnicodeString = new wchar_t[nChars];
			pszMultiByteString = new char[nChars];

			// Copy string to multibyte buffer
			lstrcpy(pszMultiByteString, lpsz);

			nCharsWritten = GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString, nChars, m_nFileCodePage);

			if (nCharsWritten > 0)
			{
				// Do byte-mode write using actual chars written (fix by Howard J Oh)
	//			CFile::Write(pszUnicodeString, lstrlen(lpsz) * sizeof(wchar_t));
				CFile::Write(pszUnicodeString, nCharsWritten * sizeof(wchar_t));
			}
			else
			{
				ASSERT(false);
			}

		}
		// Else if we don't want to write Unicode, no conversion needed, unless the code page differs
		else
		{
			//		// Do standard stuff
			//		CStdioFile::WriteString(lpsz);

					// Get the current code page
			UINT nLocaleCodePage = GetCurrentLocaleCodePage();

			// If we got it OK, and if file code page does not match the system code page, we need to do a double conversion!
			if (nLocaleCodePage > 0 && nLocaleCodePage != (UINT)m_nFileCodePage)
			{
				int	nChars = lstrlen(lpsz) + 1;	 // Why plus 1? Because yes

				pszUnicodeString = new wchar_t[nChars];

				// Initialise to something safe
				memset(pszUnicodeString, 0, sizeof(wchar_t) * nChars);

				// Convert to Unicode using the locale code page (the code page we are using in memory)
				nChars = GetUnicodeStringFromMultiByteString((LPCSTR)(const char*)lpsz, pszUnicodeString, nChars, nLocaleCodePage);

				// Convert back to multibyte using the file code page
				// (Note that you can't reliably read a non-Unicode file written in code page A on a system using a code page B,
				// modify the file and write it back using code page A, unless you disable all this double-conversion code.
				// In effect, you have to choose between a mangled character display and mangled file writing).
				if (nChars > 0)
				{
					// Calculate how much we need for the MB buffer (it might be larger)
					nChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);

					pszMultiByteString = new char[nChars];
					memset(pszMultiByteString, 0, sizeof(char) * nChars);

					nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nChars, m_nFileCodePage);

					// Do byte-mode write. This avoids annoying "interpretation" of \n's as \r\n
					CFile::Write((const void*)pszMultiByteString, nChars * sizeof(char));
				}
			}
			else
			{
				// Do byte-mode write. This avoids annoying "interpretation" of \n's as \r\n
				CFile::Write((const void*)lpsz, lstrlen(lpsz) * sizeof(char));
			}
		}

#endif
	}
	// Ensure we always clean up
	catch (...)
	{
		if (pszUnicodeString)	delete[] pszUnicodeString;
		if (pszMultiByteString)	delete[] pszMultiByteString;
		throw;
	}

	if (pszUnicodeString)	delete[] pszUnicodeString;
	if (pszMultiByteString)	delete[] pszMultiByteString;
}

UINT CStdioFileEx::ProcessFlags(const CString& sFilePath, UINT& nOpenFlags)
{
	m_bIsUnicodeText = false;
	m_bIsUtf8Text = false;

	// If we have writeUnicode we must have write or writeRead as well
#ifdef _DEBUG
	if (nOpenFlags & CStdioFileEx::modeWriteUnicode)
	{
		ASSERT(nOpenFlags & CFile::modeWrite || nOpenFlags & CFile::modeReadWrite);
	}
#endif

	// If reading in text mode and not creating... ; fixed by Dennis Jeryd 6/8/03
	if (nOpenFlags & CFile::typeText && !(nOpenFlags & CFile::modeCreate) && !(nOpenFlags & CFile::modeWrite))
	{
		m_bIsUnicodeText = IsFileUnicode(sFilePath);
		m_bIsUtf8Text = IsFileUtf8(sFilePath);

		// If it's Unicode, switch to binary mode
		if (m_bIsUnicodeText)
		{
			nOpenFlags ^= CFile::typeText;
			nOpenFlags |= CFile::typeBinary;
		}
	}

	m_nFlags = nOpenFlags;

	return nOpenFlags;
}

// --------------------------------------------------------------------------------------------
//
//	CStdioFileEx::IsFileUnicode()
//
// --------------------------------------------------------------------------------------------
// Returns:    bool
// Parameters: const CString& sFilePath
//
// Purpose:		Determines whether a file is Unicode by reading the first character and detecting
//					whether it's the Unicode byte marker.
// Notes:		None.
// Exceptions:	None.
//
/*static*/ bool CStdioFileEx::IsFileUnicode(const CString& sFilePath)
{
	CFile			file;
	bool			bIsUnicode = false;
	wchar_t			cFirstChar;
	CFileException	exFile;

	// Open file in binary mode and read first character
	if (file.Open(sFilePath, CFile::typeBinary | CFile::modeRead, &exFile))
	{
		// If byte is Unicode byte-order marker, let's say it's Unicode
		if (file.Read(&cFirstChar, sizeof(wchar_t)) > 0 && cFirstChar == (wchar_t)nUNICODE_BOM)
		{
			bIsUnicode = true;
		}

		file.Close();
	}
	else
	{
		// Handle error here if you like
	}

	return bIsUnicode;
}

/*static*/ bool CStdioFileEx::IsFileUtf8(const CString& sFilePath)
{
	CFile			file;
	bool			bIsUtf8 = false;
	unsigned char	cFirstChar[3];
	CFileException	exFile;

	// Open file in binary mode and read first character
	if (file.Open(sFilePath, CFile::typeBinary | CFile::modeRead, &exFile))
	{
		// If byte is Unicode byte-order marker, let's say it's Unicode
		if (file.Read(&cFirstChar, sizeof(cFirstChar)) > 0 && cFirstChar[0] == 0xEF && cFirstChar[1] == 0xBB && cFirstChar[2]==0xBF)
		{
			bIsUtf8 = true;
		}

		file.Close();
	}
	else
	{
		// Handle error here if you like
	}

	return bIsUtf8;
}

unsigned long CStdioFileEx::GetCharCount()
{
	int				nCharSize;
	unsigned long	nByteCount, nCharCount = 0;

	if (m_pStream)
	{
		// Get size of chars in file
		nCharSize = m_bIsUnicodeText ? sizeof(wchar_t) : sizeof(char);

		// If Unicode, remove byte order mark from count
		nByteCount = GetLength();

		if (m_bIsUnicodeText)
		{
			nByteCount = nByteCount - sizeof(wchar_t);
		}
		else if (m_bIsUtf8Text)
		{
			nByteCount = nByteCount - 3;
		}

		// Calc chars
		nCharCount = (nByteCount / nCharSize);
	}

	return nCharCount;
}

// Get the current user磗 code page
UINT CStdioFileEx::GetCurrentLocaleCodePage()
{
	_TCHAR	szLocalCodePage[10];
	UINT	nLocaleCodePage = 0;
	int		nLocaleChars = ::GetLocaleInfo(LOCALE_USER_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE, szLocalCodePage, 10);

	// If OK
	if (nLocaleChars > 0)
	{
		nLocaleCodePage = (UINT)_ttoi(szLocalCodePage);
		ASSERT(nLocaleCodePage > 0);
	}
	else
	{
		ASSERT(false);
	}

	// O means either: no ANSI code page (Unicode-only locale?) or failed to get locale
	// In the case of Unicode-only locales, what do multibyte apps do? Answers on a postcard.
	return nLocaleCodePage;
}

// --------------------------------------------------------------------------------------------
//
//	CStdioFileEx::GetUnicodeStringFromMultiByteString()
//
// --------------------------------------------------------------------------------------------
// Returns:    int - num. of chars written (0 means error)
// Parameters:	char *		szMultiByteString	(IN)		Multi-byte input string
//				wchar_t*	szUnicodeString		(OUT)		Unicode outputstring
//				int			nUnicodeBufferSize	(IN)		Size of Unicode output buffer in chars(IN)
//				UINT		nCodePage			(IN)		Code page used to perform conversion
//																Default = -1 (Get local code page).
//
// Purpose:		Gets a Unicode string from a MultiByte string.
// Notes:		None.
// Exceptions:	None.
//
int CStdioFileEx::GetUnicodeStringFromMultiByteString(IN LPCSTR szMultiByteString, OUT wchar_t* szUnicodeString, IN int nUnicodeBufferSize, IN int nCodePage)
{
	bool	bOK = true;
	int		nCharsWritten = 0;

	if (szUnicodeString && szMultiByteString)
	{
		// If no code page specified, take default for system
		if (nCodePage == -1)
		{
			nCodePage = GetACP();
		}

		try
		{
			// Zero out buffer first. NB: nUnicodeBufferSize is NUMBER OF CHARS, NOT BYTES!
			memset((void*)szUnicodeString, '\0', sizeof(wchar_t) *
				nUnicodeBufferSize);

			// When converting to UTF8, don't set any flags (see Q175392).
			nCharsWritten = MultiByteToWideChar((UINT)nCodePage,
				(nCodePage == CP_UTF8 ? 0 : MB_PRECOMPOSED), // Flags
				szMultiByteString, -1, szUnicodeString, nUnicodeBufferSize);
		}
		catch (...)
		{
			TRACE(_T("Controlled exception in MultiByteToWideChar!\n"));
		}
	}

	// Now fix nCharsWritten
	if (nCharsWritten > 0)
	{
		nCharsWritten--;
	}

	//	ASSERT(nCharsWritten > 0);
	return nCharsWritten;
}

// --------------------------------------------------------------------------------------------
//
//	CStdioFileEx::GetMultiByteStringFromUnicodeString()
//
// --------------------------------------------------------------------------------------------
// Returns:    int - number of characters written. 0 means error
// Parameters:	wchar_t *	szUnicodeString			(IN)	Unicode input string
//				char*		szMultiByteString		(OUT)	Multibyte output string
//				int			nMultiByteBufferSize	(IN)	Multibyte buffer size (chars)
//				UINT		nCodePage				(IN)	Code page used to perform conversion
//																Default = -1 (Get local code page).
//
// Purpose:		Gets a MultiByte string from a Unicode string
// Notes:		Added fix by Andy Goodwin: make buffer into int.
// Exceptions:	None.
//
int CStdioFileEx::GetMultiByteStringFromUnicodeString(wchar_t* szUnicodeString, char* szMultiByteString,
	int nMultiByteBufferSize, int nCodePage)
{
	BOOL	bUsedDefChar = FALSE;
	int		nCharsWritten = 0;

	// Fix by Andy Goodwin: don't do anything if buffer is 0
	if (nMultiByteBufferSize > 0)
	{
		if (szUnicodeString && szMultiByteString)
		{
			// Zero out buffer first
			memset((void*)szMultiByteString, '\0', nMultiByteBufferSize);

			// If no code page specified, take default for system
			if (nCodePage == -1)
			{
				nCodePage = GetACP();
			}

			try
			{
				// If writing to UTF8, flags, default char and boolean flag must be NULL
				nCharsWritten = WideCharToMultiByte((UINT)nCodePage,
					(nCodePage == CP_UTF8 ? 0 : WC_COMPOSITECHECK | WC_SEPCHARS), // Flags
					szUnicodeString, -1,
					szMultiByteString,
					nMultiByteBufferSize,
					(nCodePage == CP_UTF8 ? NULL : sDEFAULT_UNICODE_FILLER_CHAR),	// Filler char
					(nCodePage == CP_UTF8 ? NULL : &bUsedDefChar));						// Did we use filler char?

				// If no chars were written and the buffer is not 0, error!
				if (nCharsWritten == 0 && nMultiByteBufferSize > 0)
				{
					TRACE1("Error in WideCharToMultiByte: %d\n", ::GetLastError());
				}
			}
			catch (...)
			{
				TRACE(_T("Controlled exception in WideCharToMultiByte!\n"));
			}
		}
	}

	// Now fix nCharsWritten 
	if (nCharsWritten > 0)
	{
		nCharsWritten--;
	}

	return nCharsWritten;
}

//---------------------------------------------------------------------------------------------------
//
//	CStdioFileEx::GetRequiredMultiByteLengthForUnicodeString()
//
//---------------------------------------------------------------------------------------------------
// Returns:    int
// Parameters: wchar_t * szUnicodeString,int nCodePage=-1
//
// Purpose:		Obtains the multi-byte buffer size needed to accommodate a converted Unicode string.
//	Notes:		We can't assume that the buffer length is simply equal to the number of characters
//					because that wouldn't accommodate multibyte characters!
//
/*static*/ int CStdioFileEx::GetRequiredMultiByteLengthForUnicodeString(wchar_t* szUnicodeString, int nCodePage /*=-1*/)
{
	int nCharsNeeded = 0;

	try
	{
		// If no code page specified, take default for system
		if (nCodePage == -1)
		{
			nCodePage = GetACP();
		}

		// If writing to UTF8, flags, default char and boolean flag must be NULL
		nCharsNeeded = WideCharToMultiByte((UINT)nCodePage,
			(nCodePage == CP_UTF8 ? 0 : WC_COMPOSITECHECK | WC_SEPCHARS), // Flags
			szUnicodeString, -1,
			NULL,
			0,	// Calculate required buffer, please! 
			(nCodePage == CP_UTF8 ? NULL : sDEFAULT_UNICODE_FILLER_CHAR),	// Filler char
			NULL);
	}
	catch (...)
	{
		TRACE(_T("Controlled exception in WideCharToMultiByte!\n"));
	}

	return nCharsNeeded;
}

[*注]:

1、源码摘自互联网,由于具体出处未知,所以未引用源链接地址,如若侵犯版权,请联系博主!

2、源码在原文基础上,添加了对UTF-8 BOM文件类型的支持。

 

本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)

CStdioFile扩展(支持Ansi、Unicode、Utf-8等文本格式) 的相关文章

  • VC++ GDI+将CDC保存为图片文件(bmp、jpg、png)

    int GetEncoderClsid const wchar t format CLSID pClsid UINT num 0 UINT size 0 ImageCodecInfo pImageCodecInfo NULL GetImag
  • VC++ MapWinGis篇(创建图层)

    MapWinGis控件的引用 import MapWinGIS ocx rename IImage gisIImage rename ImageType gisImageType rename Point gisPoint using na
  • 多语言版本 UI资源切换

    1 如果窗体资源和源码里面所使用的字符串资源 都已经存在于资源视图里面 则通过下面方式来实现 LANGID lid GetSystemDefaultLangID if 0x0809 lid 英语 0x0809 SetThreadUILang
  • VC++ 控制台程序常用接口

    1 申请一个控制台窗口 如果程序本身是控制台程序 可以不用调用 AllocConsole 2 获取控制台屏幕缓冲区的句柄 HANDLE m hConsole GetStdHandle STD OUTPUT HANDLE 3 设置控制台标题
  • VC++ 图像颜色调节

    1 BMP图片在GDI方式下贴图 32位位图 半透明像素会显示黑色或白底 像素处理代码 void CrossImage CImage img if img IsNull return 确认该图像包含Alpha通道 if img GetBPP
  • GPAC MP4文件写入(支持H264、H265)

    1 GPAC模块下载链接https github com gpac gpac或https gpac wp imt fr downloads 2 编译指导https github com gpac gpac wiki Build Introd
  • 多语言版本 OPENFILENAME过滤器设置问题

    正常的时候 打开并选择一个文件这样写代码 char szFileName MAX PATH 0 OPENFILENAME ofn memset szFileName 0 MAX PATH memset ofn 0 sizeof ofn of
  • OpenGL GLFW入门篇 - 画点集

    效果图 主体代码 void DrawPoints void int i GLfloat x y glPushMatrix 另一个相对的Z平移可以分离对象 glLoadIdentity glTranslatef 0 0 0 0 0 f 设置点
  • C++ 结束进程

    有时候进程未正常退出 导致进程列表遗留僵尸进程 程序启动需要杀死这种僵尸进程 include TLHELP32 H void TerminateSelfApplication TCHAR szFileName MAX PATH 0 TCHA
  • C++ 内存共享/软件守护

    功能描述 在无人看守的情况下 防止软件意外退出 接口调用简单 只需要简单声明就行 int main int argc char argv QShareProcess shareProcess 共享内存名称随意 下面实现自己的主体代码即可 类
  • C++ 标准库中数据类型转换

    头文件引用
  • TightVNC H264编解码(二)之硬编码库的编译

    AVCodec codec avcodec find encoder by name nvenc h264 如果是默认的ffmpeg库 返回结果是NULL 看来是不带有硬编码功能的 重新编译分支ffnvcodec 不到半个小时编译完成 返回
  • VC++ OpenCV4.x二维码识别

    自OpenCV4 x开始 二维码识别已经悄然进入 再也不用看zbar脸色了 以下是官网发布的源码 include opencv2 objdetect hpp include opencv2 imgproc hpp include openc
  • VC++ CSWDirectoryListCtrl磁盘文件列表

    效果图 头文件定义 CSWDirectoryListCtrl h pragma once include afxwin h include
  • Node.js EventEmitter事件

    Node js EventEmitter Node js 所有的异步 I O 操作在完成时都会发送一个事件到事件队列 Node js 里面的许多对象都会分发事件 一个 net Server 对象会在每次有新连接时触发一个事件 一个 fs r
  • OpenSSL RSA加密和解密

    rsa加密的密钥格式常见的有两种 一种是PKCS 1 密钥头为 BEGIN RSA PUBLIC KEY 一种是PKCS 8 密钥头为 BEGIN PUBLIC KEY 以字符串公钥为例 对PKCS 1格式的密钥加载使用的函数是PEM re
  • VC++ Gdiplus::Bitmap抠图

    void SetBackgroundMaskImage LPCTSTR lpszImageName COLORREF crAlpha RGB 255 0 0 Gdiplus Bitmap pBkgrMaskBitmap CImageFact
  • EAN13条形码绘制(Delphi版)

    单元声明 unit UnitEAN https wenku baidu com view d61eec0dc4da50e2524de518964bcf84b9d52d0d html 共有95 18 113条数据模块 1表示黑 0表示白 左侧
  • C++ xml库的选择

    自从触及xml文件的读写 一直以来都是用的tinyxml2 接口简单 然而近期项目频繁出错 跟踪调试发现 问题出在了xml文件的读写上 当节点数超过百万级别的时候 内存暴增到G的当量 很显然程序会由于内存申请不足崩掉了 果断寻找替代品 百度
  • 粤嵌GEC6818开发板-入门感慨篇(续一)

    触摸屏的事件 include

随机推荐

  • 爬虫逆向(js逆向)

    异步爬虫的实现方式 线程池 多任务的异步协程 多线程 生产者消费者模型 线程池 前提 from flask import Flask render template from time import sleep app Flask name
  • 【NLP】文本聚类和主题建模

    大家好 我是Sonhhxg 柒 希望你看完之后 能对你有所帮助 不足请指正 共同学习交流 个人主页 Sonhhxg 柒的博客 CSDN博客 欢迎各位 点赞 收藏 留言 系列专栏 机器学习 ML 自然语言处理 NLP 深度学习 DL fore
  • Chrome浏览器更新之后在开发者工具中查看格式化后的js不显示行号问题

    最近更新了谷歌浏览器 然后在调试代码的时候发现一个问题 就是当js代码是压缩后的 将其格式化之后就只显示压缩之前的行号了 如下 未格式化的 格式化之后 这样就很无语了 突然没有了行号就很不习惯了 经过在浏览器设置里面一番找之后终于找到设置这
  • 微信公众号开发(一)——开发模式接入,消息的接收与响应

    1 想自己开一个公众号 先学习一下用 Java 进行微信公众号的开发 微信公众号的管理有开发模式和编辑模式两种 两者是互斥的 腾讯是这么讲的 编辑模式 编辑模式指所有的公众号运营者都可以通过简单的编辑 设置 按关键字回复 等功能 您可以设定
  • python 打包成可执行文件

    文章目录 pyinstaller 另外一个打包工具Nuitka 常见命令选项 工具很多 只说两个 pyinstaller 网上很多人说 pyinstaller 打包慢啊 文件大啊 这那这那的 可能是我还没理解别的工具的妙用 我发现 pyin
  • Unsupported major.minor version 52.0 版本不支持问题

    摘自 https blog csdn net qq 36769100 article details 78880341 Unsupported major minor version 52 0 这个错误网上一百度一大堆 我就简单的记一下 直
  • 从github上下载下来的c++代码用vs或QTCreator运行起来(Cmake)

    初学C 从github上下载了一份源码 不知道怎么运行 特此来记录一下 源码下载下来如图所示 1 用VS运行的方法 1 文件里有CMake 需要我们有CMake工具来构建 所以第一步就是下载CMake 下载链接 Download CMake
  • 微信小程序tab切换,可滑动切换,导航栏跟随滚动实现

    微信小程序tab切换 可滑动切换 导航栏跟随滚动实现 简介 看到今日头条小程序页面可以滑动切换 而且tab导航条也会跟着滚动 点击tab导航 页面滑动 切导航栏也会跟着滚动 就想着要怎么实现这个功能 像商城类商品类目如果做成左右滑动切换类目
  • 关于:Google Chrome 官方下载地址

    1 官方在线安装版 Google Chrome 网络浏览器https www google cn intl zh CN chrome 2 官方离线安装版
  • 五大学科竞赛(二)NIOP全国青少年信息学奥林匹克分区联赛竞赛大纲

    一 初赛内容与要求 表示普及组不涉及 以下同 计 基 算 本 机 常 的 识 诞生与发展 特点 在现代社会中的应用 计算机系统的基本组成 计算机的工作原理 计算机中的数的表示 计算机信息安全基础知识 计算机网络 计 基 算 本 机 操 的
  • IDEA使用JDBC连接MySQL数据库详细教程

    文章目录 创建项目 导入驱动 让导入的驱动生效 注册数据库驱动 连接数据库 创建项目 首先需要保证你已经成功安装mysql和下载连接MySQL数据库的驱动 在IDEA里面创建一个java项目 选择创建Java项目 JDK这里选择1 8 直接
  • 二进制文件与文本文件详解

    二进制文件 定义 二进制文件就是把内存中的数据按其在内存中存储的形式原样输出到磁盘中存放 即存放的是数据的原形式 二进制文件是包含在 ASCII 及扩展 ASCII 字符中编写的数据或程序指令的文件 一般是可执行程序 图形 声音等文件 有自
  • LeetCode 4 - 寻找两个正序数组的中位数

    二分 递归 如果某个有序数组长度是奇数 那么其中位数就是中间元素 如果长度是偶数 那么中位数就是中间两个数字的平均值 假设两个有序数组的长度分别为 m 和 n 由于两个数组长度之和 m n 的奇偶不确定 为了简化代码 在合并后的数组找到第
  • Android 环信的简单使用

    最近在项目中用到了即使用讯 客户要求用环信 我擦 第一次做 坑啊 网上对这个没有特别明确的使用教程 环信的官网也不像其他的第三方有明确的使用方法 只是说了一个简单的集成 看其他人的博客感觉都说的很麻烦 很含糊 所以现在项目完成了 做个简单的
  • MySQL数据库更换数据路径

    1 路径 原路径 datadir var lib mysql socket var lib mysql mysql sock log error var log mysqld log 更换后目标路径 datadir home mysql s
  • win10 python永久换源-- 解决VSCode配置ESP IDF到最后python virtual environment 错误 问题

    运行shell 找到 python 的 Scripts 文件夹下 例如 cd F ESP VSC ESP32 ENV python env idf4 4 py3 8 env Scripts 执行它 pip config set global
  • HTML静态网页设计基础

    如何新建一个HTML文件 答 1 新建一个TXT文件 2 打开TXT文件后 输入网页基本结构 另存为 可得到 回答over 下一步 html静态网页的基本结构 以及插入图片 插入超链接 分段 换行 标题号 表格标签 标题号 h2 第二分网页
  • java和bootstrap实现行内编辑

    实现BootstrapTable单个单元格编辑后立马提交保存 批量编辑已经选中的单元格后提交保存的实现 排序有点乱了 随便记一下吧 大概就是引入这三个文件 首先引入x editable相关的js css文件
  • unity Shader实现半透明阴影

    在shader中 要对移动端的兼容 还不想实现两套分开兼容的话 pragma exclude renderers gles gles3 glcore pragma target 4 5 这两句话一定要改掉 第一行代码直接剔除了gles的渲染
  • CStdioFile扩展(支持Ansi、Unicode、Utf-8等文本格式)

    头文件声明 CStdioFileEx h StdioFileEx h interface for the CStdioFileEx class Version 1 1 23 August 2003 Incorporated fixes fr