/*
  Copyright (C) 2000-2005 SKYRIX Software AG

  This file is part of SOPE.

  SOPE is free software; you can redistribute it and/or modify it under
  the terms of the GNU Lesser General Public License as published by the
  Free Software Foundation; either version 2, or (at your option) any
  later version.

  SOPE is distributed in the hope that it will be useful, but WITHOUT ANY
  WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
  License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with SOPE; see the file COPYING.  If not, write to the
  Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  02111-1307, USA.
*/

#include "NSString+misc.h"
#include "common.h"

/*
  TODO: support new Panther API?:
- (NSString *)stringByAddingPercentEscapesUsingEncoding:(NSStringEncoding)e
- (NSString *)stringByReplacingPercentEscapesUsingEncoding:(NSStringEncoding)e
*/

@implementation NSString(URLEscaping)

static int useUTF8Encoding = -1;

static inline BOOL doUseUTF8Encoding(void) {
  if (useUTF8Encoding == -1) {
    NSUserDefaults *ud = [NSUserDefaults standardUserDefaults];
    
    useUTF8Encoding = [ud boolForKey:@"NGUseUTF8AsURLEncoding"] ? 1 : 0;
    if (useUTF8Encoding)
      NSLog(@"Note: Using UTF-8 as URL encoding in NGExtensions.");
  }
  return useUTF8Encoding ? YES : NO;
}

static inline BOOL isUrlAlpha(unsigned char _c) {
  return
    (((_c >= 'a') && (_c <= 'z')) ||
     ((_c >= 'A') && (_c <= 'Z')))
    ? YES : NO;
}
static inline BOOL isUrlDigit(unsigned char _c) {
  return ((_c >= '0') && (_c <= '9')) ? YES : NO;
}
static inline BOOL isUrlSafeChar(unsigned char _c) {
  switch (_c) {
    case '$': case '-': case '_': case '.':
#if 0 /* see OGo bug #1260, required for forms */
    case '+':
#endif
    case '@': // TODO: not a safe char?!
      return YES;

    default:
      return NO;
  }
}
static inline BOOL isUrlExtraChar(unsigned char _c) {
  switch (_c) {
    case '!': case '*': case '"': case '\'':
    case '|': case ',':
      return YES;
  }
  return NO;
}
static inline BOOL isUrlEscapeChar(unsigned char _c) {
  return (_c == '%') ? YES : NO;
}
static inline BOOL isUrlReservedChar(unsigned char _c) {
  switch (_c) {
    case '=': case ';': case '/':
    case '#': case '?': case ':':
    case ' ':
      return YES;
  }
  return NO;
}

static inline BOOL isUrlXalpha(unsigned char _c) {
  if (isUrlAlpha(_c))      return YES;
  if (isUrlDigit(_c))      return YES;
  if (isUrlSafeChar(_c))   return YES;
  if (isUrlExtraChar(_c))  return YES;
  if (isUrlEscapeChar(_c)) return YES;
  return NO;
}

static inline BOOL isUrlHexChar(unsigned char _c) {
  if (isUrlDigit(_c))
    return YES;
  if ((_c >= 'a') && (_c <= 'f'))
    return YES;
  if ((_c >= 'A') && (_c <= 'F'))
    return YES;
  return NO;
}

static inline BOOL isUrlAlphaNum(unsigned char _c) {
  return (isUrlAlpha(_c) || isUrlDigit(_c)) ? YES : NO;
}

static inline BOOL isToBeEscaped(unsigned char _c) {
  return (isUrlAlphaNum(_c) || (_c == '_') || isUrlSafeChar(_c)) ? NO : YES;
}

static void
NGEscapeUrlBuffer(const unsigned char *_source, unsigned char *_dest,
		  unsigned srclen)
{
  register const unsigned char *src = (void*)_source;
  register unsigned i;
  for (i = 0; i < srclen; i++, src++) {
#if 0 // explain!
    if (*src == ' ') { // a ' ' becomes a '+'
      *_dest = '+'; _dest++;
    }
#endif
    if (!isToBeEscaped(*src)) {
      *_dest = *src;
      _dest++;
    } 
    else { // any other char is escaped ..
      *_dest = '%'; _dest++;
      sprintf((char *)_dest, "%02X", (unsigned)*src);
      _dest += 2;
    }
  }
  *_dest = '\0';
}

static inline int _valueOfHexChar(register unichar _c) {
  switch (_c) {
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
      return (_c - 48); // 0-9 (ascii-char)'0' - 48 => (int)0
      
    case 'A': case 'B': case 'C':
    case 'D': case 'E': case 'F':
      return (_c - 55); // A-F, A=10..F=15, 'A'=65..'F'=70
      
    case 'a': case 'b': case 'c':
    case 'd': case 'e': case 'f':
      return (_c - 87); // a-f, a=10..F=15, 'a'=97..'f'=102

    default:
      return -1;
  }
}
static inline BOOL _isHexDigit(register unichar _c) {
  switch (_c) {
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
    case 'A': case 'B': case 'C':
    case 'D': case 'E': case 'F':
    case 'a': case 'b': case 'c':
    case 'd': case 'e': case 'f':
      return YES;

    default:
      return NO;
  }
}

static void
NGUnescapeUrlBuffer(const unsigned char *_source, unsigned char *_dest)
{
  BOOL done = NO;

  while (!done && (*_source != '\0')) {
    char c = *_source;

    //if (c == '+') // '+' stands for a space
    //  *_dest = ' ';
    if (c == '%') {
      _source++; c = *_source;
      
      if (c == '\0') {
        *_dest = '%';
        done = YES;
      }
      else if (_isHexDigit(c)) { // hex-escaped char, like '%F3'
        int decChar = _valueOfHexChar(c);
        _source++;
        c = *_source;
        decChar = decChar * 16 + _valueOfHexChar(c);
        *_dest = (unsigned char)decChar;
      }
      else // escaped char, like '%%' -> '%'
        *_dest = c;
    }
    else // char passed through
      *_dest = c;

    _dest++;
    _source++;
  }
  *_dest = '\0';
}

- (BOOL)containsURLEscapeCharacters {
  register unsigned i, len;
  register unichar (*charAtIdx)(id,SEL,unsigned);
  
  if ((len = [self length]) == 0) return NO;
  
  charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
  for (i = 0; i < len; i++) {
    if (charAtIdx(self, @selector(characterAtIndex:), i) == '%')
      return YES;
  }
  return NO;
}
- (BOOL)containsURLInvalidCharacters {
  register unsigned i, len;
  register unichar (*charAtIdx)(id,SEL,unsigned);
  
  if ((len = [self length]) == 0) return NO;
  
  charAtIdx = (void*)[self methodForSelector:@selector(characterAtIndex:)];
  for (i = 0; i < len; i++) {
    if (isToBeEscaped(charAtIdx(self, @selector(characterAtIndex:), i)))
      return YES;
  }
  return NO;
}

- (NSString *)stringByUnescapingURL {
  /* 
     Input is a URL string - per definition ASCII(?!), like "hello%98%88.txt"
     output is a unicode string (never longer than the input)
     
     Note that the input itself is in some encoding! That is, the input is
     turned into a buffer eg containing UTF-8 and needs to be converted into
     a unicode string.
  */
  unsigned len;
  char     *cstr;
  char     *buffer = NULL;
  NSString *s;
  
  if (![self containsURLEscapeCharacters]) /* scan for '%' */
    return [[self copy] autorelease];
  
  if ((len = [self cStringLength]) == 0) return @"";
  
  cstr = malloc(len + 10);
  [self getCString:cstr]; /* this is OK, a URL is always in ASCII! */
  cstr[len] = '\0';
  
  buffer = malloc(len + 4);
  NGUnescapeUrlBuffer((unsigned char *)cstr, (unsigned char *)buffer);
  
  if (doUseUTF8Encoding()) {
    /* OK, the input is considered UTF-8 encoded in a string */
    s = [[NSString alloc] initWithUTF8String:buffer];
    if (buffer != NULL) free(buffer); buffer = NULL;
  }
  else {
    s = [[NSString alloc]
	  initWithCStringNoCopy:buffer
	  length:strlen(buffer)
	  freeWhenDone:YES];
  }
  if (cstr != NULL) free(cstr); cstr = NULL;
  return [s autorelease];
}

- (NSString *)stringByEscapingURL {
  unsigned len;
  NSString *s;
  char     *buffer = NULL;
  
  if ((len = [self length]) == 0) return @"";
  
  if (![self containsURLInvalidCharacters]) // needs to be escaped ?
    return [[self copy] autorelease];
  
  if (doUseUTF8Encoding()) {
    // steps:
    // a) encode into a data buffer! (eg UTF8 or ISO)
    // b) encode that buffer into URL encoding
    // c) create an ASCII string from that
    NSData *data;
    
    if ((data = [self dataUsingEncoding:NSUTF8StringEncoding]) == nil)
      return nil;
    if ((len = [data length]) == 0)
      return @"";
    
    buffer = malloc(len * 3 + 2);
    NGEscapeUrlBuffer([data bytes], (unsigned char *)buffer, len);
  }
  else {
    unsigned char *cstr;
    
    len  = [self cStringLength];
    cstr = malloc(len + 4);
    [self getCString:(char *)cstr]; // Unicode!
    cstr[len] = '\0';
    
    buffer = malloc(len * 3 + 2);
    NGEscapeUrlBuffer(cstr, (unsigned char *)buffer, len);
    if (cstr) free(cstr);
  
  }
  /* the following assumes that the default-encoding is ASCII compatible */
  s = [[NSString alloc]
	         initWithCStringNoCopy:buffer
	         length:strlen(buffer)
	         freeWhenDone:YES];
  return [s autorelease];
}

@end /* NSString(URLEscaping) */