/*
 * utf8fmt Copyright (c) 2024, James Bailie.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 *     * The name of James Bailie may not be used to endorse or promote
 * products derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include<stdlib.h>
#include<stdio.h>
#include<unistd.h>
#include<errno.h>
#include<string.h>
#include<ctype.h>

#include "string.h"

struct string *get_token( int *length, int first )
{
   static struct string *token = NULL;
   int c;
   static int nl = 0;

   if ( token == NULL && ( token = string_make( NULL )) == NULL )
   {
      fprintf( stderr, "get_token(): make_string() failed: %s", strerror( errno ));
      exit( 1 );
   }

   string_clear( token );

   while(( c = fgetc( stdin )) != EOF )
   {
      if ( c == '\n' && ( first || ++nl >= 2 ))
      {
         fputc( '\n', stdout );

         if ( *length )
         {
            fputc( '\n', stdout );
            *length = 0;
         }
      }

      if ( ! isspace( c ))
         break;
   }

   if ( c == EOF )
      return NULL;

   nl = 0;

   do
   {
      if ( isspace( c ))
      {
         if ( c == '\n' )
            ++nl;

         break;
      }

      if ( string_append( token, c ))
      {
         fprintf( stderr, "get_token(): string_append() failed: %s", strerror( errno ));
         exit( 1 );
      }
   }
   while(( c = fgetc( stdin )) != EOF );

   return ( token->used ? token : NULL );
}

/*
 *  Unicode for single typographical close quote: 0x2019 = 0010 0000 0001 1001
 *  UTF-8: 1110 0010 | 1000 000 | 1001 1001 = 226 | 128 | 153
 *
 *  Unicode for double typographical close quote: 0x201D = 0010 0000 0001 1101
 *  UTF-8: 1110 0010 | 1000 000 | 1001 1101 = 226 | 128 | 157
 *
 *  Unicode for close guillemet: U+00BB = 0000 0000 1011 1011
 *  UTF-8: 1100 0010 | 1011 1011 = 194 | 187
 */

int is_sentence_end( struct string *word, int abbrevs )
{
   unsigned char last = word->str[ word->used - 1 ];
   char *ptr1, *ptr2, **aptr, *abbr[] = { "Mr.", "Ms.", "Mrs.", "M.", "MM.", "Mme.", "Mmes.", "Mlle.", "Mlles.", NULL };

   if ( word->used > 1 )
   {
      if ( last == '"' || last == '\'' )
         last = word->str[ word->used - 2 ];
      else if ( word->used > 2 )
      {
         if ( last == 187 && ( unsigned char )word->str[ word->used - 2 ] == 194 )
            last = word->str[ word->used - 3 ];
         else if ( word->used > 3 && ( last == 153 || last == 157 ))
         {
            if (( unsigned char )word->str[ word->used - 2 ] == 128 && ( unsigned char )word->str[ word->used - 3 ] == 226 )
               last = word->str[ word->used - 4 ];
         }
      }
   }

   if ( last == '?' || last == '!' )
      return 1;

   if ( last == '.' )
   {
      if ( abbrevs )
      {
         for( aptr = abbr; *aptr != NULL; ++aptr )
         {
            ptr1 = word->str;
            ptr2 = *aptr;

            while( *ptr1 && *ptr2 && *ptr1 == *ptr2 )
            {
               ++ptr1;
               ++ptr2;
            }

            if ( ! *ptr1 && ! *ptr2 )
               return 0;
         }
      }

      return 1;
   }

   return 0;
}

void wrap( int limit, int spaces, int flow, int abbrevs )
{
   struct string *word;
   int length = 0, trigger = 0, first = 1, total, wordlen;

   while(( word = get_token( &length, first )) != NULL )
   {
      first = 0;
      wordlen = string_UTF8_len( word->str );
      total = length + wordlen + ( length ? 1 + trigger : 0 );

      if ( flow || total <= limit )
      {
         if ( length )
         {
            fputc( ' ', stdout );

            if ( trigger )
               fputc( ' ', stdout );
         }

         fputs( word->str, stdout );
         length = total;

         if ( spaces )
            trigger = is_sentence_end( word, abbrevs );

         continue;
      }

      if ( length )
         fputc( '\n', stdout );

      fputs( word->str, stdout );

      if ( spaces )
         trigger = is_sentence_end( word, abbrevs );

      length = string_UTF8_len( word->str );
   }

   if ( length )
      fputc( '\n', stdout );
}

void redirect_stdin( char *infile )
{
   FILE *file;

   if ( infile == NULL || ( *infile == '-' && ! infile[ 1 ] ))
      return;

   if (( file = fopen( infile, "r" )) == NULL )
   {
      fprintf( stderr, "redirect_stdin(): fopen( %s, \"r\" ): %s", infile, strerror( errno ));
      exit( 1 );
   }

   fclose( stdin );
   stdin = file;
}

void redirect_stdout( char *outfile )
{
   FILE *file;

   if ( outfile == NULL )
      return;

   unlink( outfile );

   if (( file = fopen( outfile, "w" )) == NULL )
   {
      fprintf( stderr, "redirect_stdout(): fopen( %s, \"w\" ): %s\n", outfile, strerror( errno ));
      exit( 1 );
   }

   fclose( stdout );
   stdout = file;
}

int main( int argc, char **argv )
{
   int opt, args = 0, length = 79, spaces = 0, flow = 0, abbrevs = 0;

   while(( opt = getopt( argc, argv, "afsul:" )) != -1 )
   {
      ++args;

      switch( opt )
      {
         case 'a':
            ++abbrevs;
            break;

         case 'f':
            ++flow;
            break;

         case 'l':
            ++args;
            length = strtol( optarg, NULL, 10 );
            break;

         case 's':
            ++spaces;
            break;
      }
   }

   if ( length <= 0 )
   {
      fprintf( stderr, "-l value is <= 0: %d\n", length );
      exit( 1 );
   }

   argc -= args;
   argv += args;

   if ( argc > 1 )
      redirect_stdin( argv[ 1 ] );

   if ( argc > 2 )
      redirect_stdout( argv[ 2 ] );

   wrap( length, spaces, flow, abbrevs );
}
