XMLURL.cpp
上传用户:huihehuasu
上传日期:2007-01-10
资源大小:6948k
文件大小:33k
- /*
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 1999-2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation, and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.ibm.com . For more information
- * on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
- /*
- * $Id: XMLURL.cpp,v 1.20 2001/10/10 23:16:46 jasons Exp $
- */
- // ---------------------------------------------------------------------------
- // Includes
- // ---------------------------------------------------------------------------
- #include <util/BinFileInputStream.hpp>
- #include <util/Janitor.hpp>
- #include <util/PlatformUtils.hpp>
- #include <util/RuntimeException.hpp>
- #include <util/TransService.hpp>
- #include <util/XMLURL.hpp>
- #include <util/XMLNetAccessor.hpp>
- #include <util/XMLString.hpp>
- #include <util/XMLUniDefs.hpp>
- #include <util/XMLUni.hpp>
- // ---------------------------------------------------------------------------
- // Local types
- //
- // TypeEntry
- // This structure defines a single entry in the list of URL types. Each
- // entry indicates the prefix for that type of URL, and the SourceTypes
- // value it maps to.
- // ---------------------------------------------------------------------------
- struct ProtoEntry
- {
- XMLURL::Protocols protocol;
- const XMLCh* prefix;
- unsigned int defPort;
- };
- // ---------------------------------------------------------------------------
- // Local data
- //
- // gXXXString
- // These are the strings for our prefix types. They all have to be
- // Unicode strings all the time, so we can't just do regular strings.
- //
- // gProtoList
- // The list of URL types that we support and some info related to each
- // one.
- //
- // gMaxProtoLen
- // The length of the longest protocol string
- //
- // NOTE:!!! Be sure to keep this up to date if new protocols are added!
- // ---------------------------------------------------------------------------
- static const XMLCh gFileString[] =
- {
- chLatin_f, chLatin_i, chLatin_l, chLatin_e, chNull
- };
- static const XMLCh gFTPString[] =
- {
- chLatin_f, chLatin_t, chLatin_p, chNull
- };
- static const XMLCh gHTTPString[] =
- {
- chLatin_h, chLatin_t, chLatin_t, chLatin_p, chNull
- };
- static ProtoEntry gProtoList[XMLURL::Protocols_Count] =
- {
- { XMLURL::File , gFileString , 0 }
- , { XMLURL::HTTP , gHTTPString , 80 }
- , { XMLURL::FTP , gFTPString , 21 }
- };
- // !!! Keep these up to date with list above!
- static const unsigned int gMaxProtoLen = 4;
- // ---------------------------------------------------------------------------
- // Local methods
- // ---------------------------------------------------------------------------
- static bool isHexDigit(const XMLCh toCheck)
- {
- if ((toCheck >= chDigit_0) && (toCheck <= chDigit_9)
- || (toCheck >= chLatin_A) && (toCheck <= chLatin_Z)
- || (toCheck >= chLatin_a) && (toCheck <= chLatin_z))
- {
- return true;
- }
- return false;
- }
- static unsigned int xlatHexDigit(const XMLCh toXlat)
- {
- if ((toXlat >= chDigit_0) && (toXlat <= chDigit_9))
- return (unsigned int)(toXlat - chDigit_0);
- if ((toXlat >= chLatin_A) && (toXlat <= chLatin_Z))
- return (unsigned int)(toXlat - chLatin_A) + 10;
- return (unsigned int)(toXlat - chLatin_a) + 10;
- }
- // ---------------------------------------------------------------------------
- // XMLURL: Public, static methods
- // ---------------------------------------------------------------------------
- XMLURL::Protocols XMLURL::lookupByName(const XMLCh* const protoName)
- {
- for (unsigned int index = 0; index < XMLURL::Protocols_Count; index++)
- {
- if (!XMLString::compareIString(gProtoList[index].prefix, protoName))
- return gProtoList[index].protocol;
- }
- return XMLURL::Unknown;
- }
- // ---------------------------------------------------------------------------
- // XMLURL: Constructors and Destructor
- // ---------------------------------------------------------------------------
- XMLURL::XMLURL() :
- fFragment(0)
- , fHost(0)
- , fPassword(0)
- , fPath(0)
- , fPortNum(0)
- , fProtocol(XMLURL::Unknown)
- , fQuery(0)
- , fUser(0)
- , fURLText(0)
- {
- }
- XMLURL::XMLURL(const XMLCh* const baseURL
- , const XMLCh* const relativeURL) :
- fFragment(0)
- , fHost(0)
- , fPassword(0)
- , fPath(0)
- , fPortNum(0)
- , fProtocol(XMLURL::Unknown)
- , fQuery(0)
- , fUser(0)
- , fURLText(0)
- {
- try
- {
- setURL(baseURL, relativeURL);
- }
- catch(...)
- {
- cleanup();
- throw;
- }
- }
- XMLURL::XMLURL(const XMLCh* const baseURL
- , const char* const relativeURL) :
- fFragment(0)
- , fHost(0)
- , fPassword(0)
- , fPath(0)
- , fPortNum(0)
- , fProtocol(XMLURL::Unknown)
- , fQuery(0)
- , fUser(0)
- , fURLText(0)
- {
- XMLCh* tmpRel = XMLString::transcode(relativeURL);
- ArrayJanitor<XMLCh> janRel(tmpRel);
- try
- {
- setURL(baseURL, tmpRel);
- }
- catch(...)
- {
- cleanup();
- throw;
- }
- }
- XMLURL::XMLURL(const XMLURL& baseURL
- , const XMLCh* const relativeURL) :
- fFragment(0)
- , fHost(0)
- , fPassword(0)
- , fPath(0)
- , fPortNum(0)
- , fProtocol(XMLURL::Unknown)
- , fQuery(0)
- , fUser(0)
- , fURLText(0)
- {
- try
- {
- setURL(baseURL, relativeURL);
- }
- catch(...)
- {
- cleanup();
- throw;
- }
- }
- XMLURL::XMLURL(const XMLURL& baseURL
- , const char* const relativeURL) :
- fFragment(0)
- , fHost(0)
- , fPassword(0)
- , fPath(0)
- , fPortNum(0)
- , fProtocol(XMLURL::Unknown)
- , fQuery(0)
- , fUser(0)
- , fURLText(0)
- {
- XMLCh* tmpRel = XMLString::transcode(relativeURL);
- ArrayJanitor<XMLCh> janRel(tmpRel);
- try
- {
- setURL(baseURL, tmpRel);
- }
- catch(...)
- {
- cleanup();
- throw;
- }
- }
- XMLURL::XMLURL(const XMLCh* const urlText) :
- fFragment(0)
- , fHost(0)
- , fPassword(0)
- , fPath(0)
- , fPortNum(0)
- , fProtocol(XMLURL::Unknown)
- , fQuery(0)
- , fUser(0)
- , fURLText(0)
- {
- try
- {
- setURL(urlText);
- }
- catch(...)
- {
- cleanup();
- throw;
- }
- }
- XMLURL::XMLURL(const char* const urlText) :
- fFragment(0)
- , fHost(0)
- , fPassword(0)
- , fPath(0)
- , fPortNum(0)
- , fProtocol(XMLURL::Unknown)
- , fQuery(0)
- , fUser(0)
- , fURLText(0)
- {
- XMLCh* tmpText = XMLString::transcode(urlText);
- ArrayJanitor<XMLCh> janRel(tmpText);
- try
- {
- setURL(tmpText);
- }
- catch(...)
- {
- cleanup();
- throw;
- }
- }
- XMLURL::XMLURL(const XMLURL& toCopy) :
- fFragment(XMLString::replicate(toCopy.fFragment))
- , fHost(XMLString::replicate(toCopy.fHost))
- , fPassword(XMLString::replicate(toCopy.fPassword))
- , fPath(XMLString::replicate(toCopy.fPath))
- , fPortNum(toCopy.fPortNum)
- , fProtocol(toCopy.fProtocol)
- , fQuery(XMLString::replicate(toCopy.fQuery))
- , fUser(XMLString::replicate(toCopy.fUser))
- , fURLText(XMLString::replicate(toCopy.fURLText))
- {
- }
- XMLURL::~XMLURL()
- {
- cleanup();
- }
- // ---------------------------------------------------------------------------
- // XMLURL: Public operators
- // ---------------------------------------------------------------------------
- XMLURL& XMLURL::operator=(const XMLURL& toAssign)
- {
- if (this == &toAssign)
- return *this;
- // Clean up our stuff
- cleanup();
- // And copy his stuff
- fFragment = XMLString::replicate(toAssign.fFragment);
- fHost = XMLString::replicate(toAssign.fHost);
- fPassword = XMLString::replicate(toAssign.fPassword);
- fPath = XMLString::replicate(toAssign.fPath);
- fProtocol = toAssign.fProtocol;
- fQuery = XMLString::replicate(toAssign.fQuery);
- fUser = XMLString::replicate(toAssign.fUser);
- fURLText = XMLString::replicate(toAssign.fURLText);
- return *this;
- }
- bool XMLURL::operator==(const XMLURL& toCompare) const
- {
- //
- // Compare the two complete URLs (which have been processed the same
- // way so they should now be the same even if they came in via different
- // relative parts.
- //
- if (XMLString::compareString(getURLText(), toCompare.getURLText()))
- return false;
- return true;
- }
- // ---------------------------------------------------------------------------
- // XMLURL: Getter methods
- // ---------------------------------------------------------------------------
- unsigned int XMLURL::getPortNum() const
- {
- //
- // If it was not provided explicitly, then lets return the default one
- // for the protocol.
- //
- if (!fPortNum)
- {
- if (fProtocol == Unknown)
- return 0;
- return gProtoList[fProtocol].defPort;
- }
- return fPortNum;
- }
- const XMLCh* XMLURL::getProtocolName() const
- {
- // Check to see if its ever been set
- if (fProtocol == Unknown)
- ThrowXML(MalformedURLException, XMLExcepts::URL_NoProtocolPresent);
- return gProtoList[fProtocol].prefix;
- }
- // ---------------------------------------------------------------------------
- // XMLURL: Setter methods
- // ---------------------------------------------------------------------------
- void XMLURL::setURL(const XMLCh* const urlText)
- {
- //
- // Try to parse the URL.
- //
- cleanup();
- parse(urlText);
- }
- void XMLURL::setURL(const XMLCh* const baseURL
- , const XMLCh* const relativeURL)
- {
- cleanup();
- // Parse our URL string
- parse(relativeURL);
- //
- // If its relative and the base is non-null and non-empty, then
- // parse the base URL string and conglomerate them.
- //
- if (isRelative() && baseURL)
- {
- if (*baseURL)
- {
- XMLURL basePart(baseURL);
- if (!conglomerateWithBase(basePart, false))
- {
- cleanup();
- ThrowXML(MalformedURLException, XMLExcepts::URL_RelativeBaseURL);
- }
- }
- }
- }
- void XMLURL::setURL(const XMLURL& baseURL
- , const XMLCh* const relativeURL)
- {
- cleanup();
- // Parse our URL string
- parse(relativeURL);
- // If its relative, then conglomerate with the base URL
- if (isRelative())
- conglomerateWithBase(baseURL);
- }
- // ---------------------------------------------------------------------------
- // XMLURL: Miscellaneous methods
- // ---------------------------------------------------------------------------
- bool XMLURL::isRelative() const
- {
- // If no protocol then relative
- if (fProtocol == Unknown)
- return true;
- // If no path, or the path is not absolute, then relative
- if (!fPath)
- return true;
- if (*fPath != chForwardSlash)
- return true;
- return false;
- }
- BinInputStream* XMLURL::makeNewStream() const
- {
- //
- // If its a local host, then we short circuit it and use our own file
- // stream support. Otherwise, we just let it fall through and let the
- // installed network access object provide a stream.
- //
- if (fProtocol == XMLURL::File)
- {
- if (!fHost || !XMLString::compareIString(fHost, XMLUni::fgLocalHostString))
- {
- //
- // We have to play a little trick here. If its really a Windows
- // style fully qualified path, we have to toss the leading /
- // character.
- //
- const XMLCh* realPath = fPath;
- if (*fPath == chForwardSlash)
- {
- if (XMLString::stringLen(fPath) > 3)
- {
- if (*(fPath + 2) == chColon)
- {
- const XMLCh chDrive = *(fPath + 1);
- if (((chDrive >= chLatin_A) && (chDrive <= chLatin_Z))
- || ((chDrive >= chLatin_a) && (chDrive <= chLatin_z)))
- {
- realPath = fPath + 1;
- }
- }
- // Similarly for UNC paths
- if ( *(fPath + 1) == *(fPath + 2) &&
- (*(fPath + 1) == chForwardSlash ||
- *(fPath + 1) == chBackSlash) )
- {
- realPath = fPath + 1;
- }
- }
- }
- BinFileInputStream* retStrm = new BinFileInputStream(realPath);
- if (!retStrm->getIsOpen())
- {
- delete retStrm;
- return 0;
- }
- return retStrm;
- }
- }
- //
- // If we don't have have an installed net accessor object, then we
- // have to just throw here.
- //
- if (!XMLPlatformUtils::fgNetAccessor)
- ThrowXML(MalformedURLException, XMLExcepts::URL_UnsupportedProto);
- // Else ask the net accessor to create the stream
- return XMLPlatformUtils::fgNetAccessor->makeNew(*this);
- }
- void XMLURL::makeRelativeTo(const XMLCh* const baseURLText)
- {
- // If this one is not relative, don't bother
- if (!isRelative())
- return;
- XMLURL baseURL(baseURLText);
- conglomerateWithBase(baseURL);
- }
- void XMLURL::makeRelativeTo(const XMLURL& baseURL)
- {
- // If this one is not relative, don't bother
- if (!isRelative())
- return;
- conglomerateWithBase(baseURL);
- }
- // ---------------------------------------------------------------------------
- // XMLURL: Private helper methods
- // ---------------------------------------------------------------------------
- //
- // This method will take the broken out parts of the URL and build up the
- // full text. We don't do this unless someone asks us to, since its often
- // never required.
- //
- void XMLURL::buildFullText()
- {
- // Calculate the worst case size of the buffer required
- unsigned int bufSize = gMaxProtoLen + 1
- + XMLString::stringLen(fFragment) + 1
- + XMLString::stringLen(fHost) + 2
- + XMLString::stringLen(fPassword) + 1
- + XMLString::stringLen(fPath)
- + XMLString::stringLen(fQuery) + 1
- + XMLString::stringLen(fUser) + 1
- + 32;
- // Clean up the existing buffer and allocate another
- delete [] fURLText;
- fURLText = new XMLCh[bufSize];
- *fURLText = 0;
- XMLCh* outPtr = fURLText;
- if (fProtocol != Unknown)
- {
- XMLString::catString(fURLText, getProtocolName());
- outPtr += XMLString::stringLen(fURLText);
- *outPtr++ = chColon;
- *outPtr++ = chForwardSlash;
- *outPtr++ = chForwardSlash;
- }
- if (fUser)
- {
- XMLString::copyString(outPtr, fUser);
- outPtr += XMLString::stringLen(fUser);
- if (fPassword)
- {
- *outPtr++ = chColon;
- XMLString::copyString(outPtr, fPassword);
- outPtr += XMLString::stringLen(fPassword);
- }
- *outPtr++ = chAt;
- }
- if (fHost)
- {
- XMLString::copyString(outPtr, fHost);
- outPtr += XMLString::stringLen(fHost);
- //
- // If the port is zero, then we don't put it in. Else we need
- // to because it was explicitly provided.
- //
- if (fPortNum)
- {
- *outPtr++ = chColon;
- XMLCh tmpBuf[16];
- XMLString::binToText(fPortNum, tmpBuf, 16, 10);
- XMLString::copyString(outPtr, tmpBuf);
- outPtr += XMLString::stringLen(tmpBuf);
- }
- }
- if (fPath)
- {
- XMLString::copyString(outPtr, fPath);
- outPtr += XMLString::stringLen(fPath);
- }
- if (fQuery)
- {
- *outPtr++ = chQuestion;
- XMLString::copyString(outPtr, fQuery);
- outPtr += XMLString::stringLen(fQuery);
- }
- if (fFragment)
- {
- *outPtr++ = chPound;
- XMLString::copyString(outPtr, fFragment);
- outPtr += XMLString::stringLen(fFragment);
- }
- // Cap it off in case the last op was not a string copy
- *outPtr = 0;
- }
- //
- // Just a central place to handle cleanup, since its done from a number
- // of different spots.
- //
- void XMLURL::cleanup()
- {
- delete [] fFragment;
- delete [] fHost;
- delete [] fPassword;
- delete [] fPath;
- delete [] fQuery;
- delete [] fUser;
- delete [] fURLText;
- fFragment = 0;
- fHost = 0;
- fPassword = 0;
- fPath = 0;
- fQuery = 0;
- fUser = 0;
- fURLText = 0;
- fProtocol = Unknown;
- fPortNum = 0;
- }
- //This function has been modified to take a bool parameter and the
- //functionality inside looks irrational but is only to make
- //solaris 2.7 CC 5.0 optimized build happy.
- bool XMLURL::conglomerateWithBase(const XMLURL& baseURL, bool useExceptions)
- {
- // The base URL cannot be relative
- if (baseURL.isRelative())
- {
- if (useExceptions)
- ThrowXML(MalformedURLException, XMLExcepts::URL_RelativeBaseURL);
- else
- return false;
- }
- //
- // Check a special case. If all we have is a fragment, then we want
- // to just take the base host and path, plus our fragment.
- //
- if ((fProtocol == Unknown)
- && !fHost
- && !fPath
- && fFragment)
- {
- // Just in case, make sure we don't leak the user or password values
- delete [] fUser;
- fUser = 0;
- delete [] fPassword;
- fPassword = 0;
- // Copy over the protocol and port number as is
- fProtocol = baseURL.fProtocol;
- fPortNum = baseURL.fPortNum;
- // Replicate the base fields that are provided
- fHost = XMLString::replicate(baseURL.fHost);
- fUser = XMLString::replicate(baseURL.fUser);
- fPassword = XMLString::replicate(baseURL.fPassword);
- fPath = XMLString::replicate(baseURL.fPath);
- return true;
- }
- //
- // All we have to do is run up through our fields and, for each one
- // that we don't have, use the based URL's. Once we hit one field
- // that we have, we stop.
- //
- if (fProtocol != Unknown)
- return true;
- fProtocol = baseURL.fProtocol;
- //
- // If the protocol is not file, and we either already have our own
- // host, or the base does not have one, then we are done.
- //
- if (fProtocol != File)
- {
- if (fHost || !baseURL.fHost)
- return true;
- }
- // Replicate all of the hosty stuff if the base has one
- if (baseURL.fHost)
- {
- // Just in case, make sure we don't leak a user or password field
- delete [] fUser;
- fUser = 0;
- delete [] fPassword;
- fPassword = 0;
- delete [] fHost;
- fHost = 0;
- fHost = XMLString::replicate(baseURL.fHost);
- fUser = XMLString::replicate(baseURL.fUser);
- fPassword = XMLString::replicate(baseURL.fPassword);
- fPortNum = baseURL.fPortNum;
- }
- // If we have a path and its absolute, then we are done
- const bool hadPath = (fPath != 0);
- if (hadPath)
- {
- if (*fPath == chForwardSlash)
- return true;
- }
- // Its a relative path, so weave them together.
- if (baseURL.fPath)
- weavePaths(baseURL.fPath);
- // If we had any original path, then we are done
- if (hadPath)
- return true;
- // We had no original path, so go on to deal with the query/fragment parts
- if (fQuery || !baseURL.fQuery)
- return true;
- fQuery = XMLString::replicate(baseURL.fQuery);
- if (fFragment || !baseURL.fFragment)
- return true;
- fFragment = XMLString::replicate(baseURL.fFragment);
- return true;
- }
- void XMLURL::parse(const XMLCh* const urlText)
- {
- // Simplify things by checking for the psycho scenarios first
- if (!*urlText)
- ThrowXML(MalformedURLException, XMLExcepts::URL_NoProtocolPresent);
- //
- // The first thing we will do is to check for a file name, so that
- // we don't waste time thinking its a URL. If its in the form x:
- // or x:/ and x is an ASCII letter, then assume that's the deal.
- //
- if (((*urlText >= chLatin_A) && (*urlText <= chLatin_Z))
- || ((*urlText >= chLatin_a) && (*urlText <= chLatin_z)))
- {
- if (*(urlText + 1) == chColon)
- {
- if ((*(urlText + 2) == chForwardSlash)
- || (*(urlText + 2) == chBackSlash))
- {
- ThrowXML(MalformedURLException, XMLExcepts::URL_NoProtocolPresent);
- }
- }
- }
- // Get a copy of the URL that we can modify
- XMLCh* srcCpy = XMLString::replicate(urlText);
- ArrayJanitor<XMLCh> janSrcCopy(srcCpy);
- //
- // Get a pointer now that we can run up thrown the source as we parse
- // bits and pieces out of it.
- //
- XMLCh* srcPtr = srcCpy;
- // Run up past any spaces
- while (*srcPtr)
- {
- if (!XMLPlatformUtils::fgTransService->isSpace(*srcPtr))
- break;
- srcPtr++;
- }
- // Make sure it wasn't all space
- if (!*srcPtr)
- ThrowXML(MalformedURLException, XMLExcepts::URL_NoProtocolPresent);
- //
- // Ok, the next thing we have to do is to find either a / or : character.
- // If the : is first, we assume we have a protocol. If the / is first,
- // then we skip to the host processing.
- //
- static const XMLCh listOne[] = { chColon, chForwardSlash, chNull };
- static const XMLCh listTwo[] = { chAt, chNull };
- static const XMLCh listThree[] = { chColon, chNull };
- static const XMLCh listFour[] = { chForwardSlash, chNull };
- static const XMLCh listFive[] = { chPound, chQuestion, chNull };
- static const XMLCh listSix[] = { chPound, chNull };
- XMLCh* ptr1 = XMLString::findAny(srcPtr, listOne);
- XMLCh* ptr2;
- // If we found a protocol, then deal with it
- if (ptr1)
- {
- if (*ptr1 == chColon)
- {
- // Cap the string at the colon
- *ptr1 = 0;
- // And try to find it in our list of protocols
- fProtocol = lookupByName(srcPtr);
- if (fProtocol == Unknown)
- {
- ThrowXML1
- (
- MalformedURLException
- , XMLExcepts::URL_UnsupportedProto1
- , srcPtr
- );
- }
- // And move our source pointer up past what we've processed
- srcPtr = (ptr1 + 1);
- }
- }
- //
- // Ok, next we need to see if we have any host part. If the next
- // two characters are //, then we need to check, else move on.
- //
- if ((*srcPtr == chForwardSlash) && (*(srcPtr + 1) == chForwardSlash))
- {
- // Move up past the slashes
- srcPtr += 2;
- //
- // If we aren't at the end of the string, then there has to be a
- // host part at this point. we will just look for the next / char
- // or end of string and make all of that the host for now.
- //
- if (*srcPtr)
- {
- // Search from here for a / character
- ptr1 = XMLString::findAny(srcPtr, listFour);
- //
- // If we found something, then the host is between where
- // we are and what we found. Else the host is the rest of
- // the content and we are done. If its empty, leave it null.
- //
- if (ptr1)
- {
- if (ptr1 != srcPtr)
- {
- delete [] fHost;
- fHost = new XMLCh[(ptr1 - srcPtr) + 1];
- ptr2 = fHost;
- while (srcPtr < ptr1)
- *ptr2++ = *srcPtr++;
- *ptr2 = 0;
- }
- }
- else
- {
- delete [] fHost;
- fHost = XMLString::replicate(srcPtr);
- // Update source pointer to the end
- srcPtr += XMLString::stringLen(fHost);
- }
- }
- }
- else
- {
- //
- // http protocol requires two forward slashes
- // we didn't get them, so throw an exception
- //
- if (fProtocol == HTTP) {
- ThrowXML1
- (
- MalformedURLException
- , XMLExcepts::URL_ExpectingTwoSlashes
- , "Found 'http' protocol"
- );
- }
- }
- //
- // If there was a host part, then we have to grovel through it for
- // all the bits and pieces it can hold.
- //
- if (fHost)
- {
- //
- // Look for a '@' character, which indicates a user name. If we
- // find one, then everything between the start of the host data
- // and the character is the user name.
- //
- ptr1 = XMLString::findAny(fHost, listTwo);
- if (ptr1)
- {
- // Get this info out as the user name
- *ptr1 = 0;
- delete [] fUser;
- fUser = XMLString::replicate(fHost);
- ptr1++;
- // And now cut these chars from the host string
- XMLString::cut(fHost, ptr1 - fHost);
- // Is there a password inside the user string?
- ptr2 = XMLString::findAny(fUser, listThree);
- if (ptr2)
- {
- // Remove it from the user name string
- *ptr2 = 0;
- // And copy out the remainder to the password field
- ptr2++;
- delete [] fPassword;
- fPassword = XMLString::replicate(ptr2);
- }
- }
- //
- // Ok, so now we are at the actual host name, if any. If we are
- // not at the end of the host data, then lets see if we have a
- // port trailing the
- //
- ptr1 = XMLString::findAny(fHost, listThree);
- if (ptr1)
- {
- // Remove it from the host name
- *ptr1 = 0;
- // Try to convert it to a numeric port value and store it
- ptr1++;
- if (!XMLString::textToBin(ptr1, fPortNum))
- ThrowXML(MalformedURLException, XMLExcepts::URL_BadPortField);
- }
- // If the host ended up empty, then toss is
- if (!*fHost)
- {
- delete[] fHost;
- fHost = 0;
- }
- }
- // If we are at the end, then we are done now
- if (!*srcPtr)
- {
- return;
- }
- //
- // Next is the path part. It can be absolute, i.e. starting with a
- // forward slash character, or relative. Its basically everything up
- // to the end of the string or to any trailing query or fragment.
- //
- ptr1 = XMLString::findAny(srcPtr, listFive);
- if (!ptr1)
- {
- delete [] fPath;
- fPath = XMLString::replicate(srcPtr);
- return;
- }
- // Everything from where we are to what we found is the path
- if (ptr1 > srcPtr)
- {
- delete [] fPath;
- fPath = new XMLCh[(ptr1 - srcPtr) + 1];
- ptr2 = fPath;
- while (srcPtr < ptr1)
- *ptr2++ = *srcPtr++;
- *ptr2 = 0;
- }
- //
- // If we found a fragment, then it is the rest of the string and we
- // are done.
- //
- if (*srcPtr == chPound)
- {
- srcPtr++;
- delete [] fFragment;
- fFragment = XMLString::replicate(srcPtr);
- return;
- }
- //
- // The query is either the rest of the string, or up to the fragment
- // separator.
- //
- srcPtr++;
- ptr1 = XMLString::findAny(srcPtr, listSix);
- delete [] fQuery;
- if (!ptr1)
- {
- fQuery = XMLString::replicate(srcPtr);
- return;
- }
- else
- {
- fQuery = new XMLCh[(ptr1 - srcPtr) + 1];
- ptr2 = fQuery;
- while (srcPtr < ptr1)
- *ptr2++ = *srcPtr++;
- *ptr2 = 0;
- }
- // If we are not at the end now, then everything else is the fragment
- if (*srcPtr == chPound)
- {
- srcPtr++;
- delete [] fFragment;
- fFragment = XMLString::replicate(srcPtr);
- }
- }
- void XMLURL::weavePaths(const XMLCh* const basePart)
- {
- // Watch for stupid stuff
- if (!basePart)
- return;
- if (!*basePart)
- return;
- //
- // Ok, lets start at the end of the base path and work backwards and
- // our path part and work forwards. For each leading . we see, we just
- // eat it. For each leading .. we see, we eat it and throw away one
- // level in the source URL.
- //
- // If the last character in the base part is a forward slash, back
- // up one first before we look for the last slash.
- //
- const XMLCh* basePtr = basePart + (XMLString::stringLen(basePart) - 1);
- if (*basePtr == chForwardSlash)
- basePtr--;
- while ((basePtr >= basePart)
- && ((*basePtr != chForwardSlash) && (*basePtr != chBackSlash)))
- {
- basePtr--;
- }
- if (basePtr < basePart)
- return;
- // Create a buffer as large as both parts
- XMLCh* tmpBuf = new XMLCh[XMLString::stringLen(fPath)
- + XMLString::stringLen(basePart)
- + 2];
- //
- // If we have no path part, then copy the base part up to the
- // base pointer
- //
- if (!fPath)
- {
- XMLCh* bufPtr = tmpBuf;
- const XMLCh* tmpPtr = basePart;
- while (tmpPtr <= basePtr)
- *bufPtr++ = *tmpPtr++;
- *bufPtr = 0;
- fPath = tmpBuf;
- return;
- }
- // After this, make sure the buffer gets handled if we exit early
- ArrayJanitor<XMLCh> janBuf(tmpBuf);
- //
- // We have some path part, so we need to check to see if we ahve to
- // weave any of the parts together.
- //
- XMLCh* pathPtr = fPath;
- while (true)
- {
- // If it does not start with some period, then we are done
- if (*pathPtr != chPeriod)
- break;
- unsigned int periodCount = 1;
- pathPtr++;
- if (*pathPtr == chPeriod)
- {
- pathPtr++;
- periodCount++;
- }
- // Has to be followed by a / or or the null to mean anything
- if ((*pathPtr != chForwardSlash) && (*pathPtr != chBackSlash)
- && *pathPtr)
- {
- break;
- }
- if (*pathPtr)
- pathPtr++;
- // If its one period, just eat it, else move backwards in the base
- if (periodCount == 2)
- {
- basePtr--;
- while ((basePtr >= basePart)
- && ((*basePtr != chForwardSlash) && (*basePtr != chBackSlash)))
- {
- basePtr--;
- }
- // There are not enough levels to handle all the .. parts
- if (basePtr < basePart)
- ThrowXML(MalformedURLException, XMLExcepts::URL_BaseUnderflow);
- }
- }
- // Copy the base part up to the base pointer
- XMLCh* bufPtr = tmpBuf;
- const XMLCh* tmpPtr = basePart;
- while (tmpPtr <= basePtr)
- *bufPtr++ = *tmpPtr++;
- // And then copy on the rest of our path
- XMLString::copyString(bufPtr, pathPtr);
- // Now delete our path and make the new buffer our path
- delete [] fPath;
- janBuf.orphan();
- fPath = tmpBuf;
- }