From d0180007366dd9e1e4b1e7aec249998135fbaa2d Mon Sep 17 00:00:00 2001
From: Jan Nieuwenhuizen <janneke@gnu.org>
Date: Fri, 6 Mar 2009 11:31:28 +0100
Subject: [PATCH] 2009-01-29  Jan Nieuwenhuizen  <janneke@gnu.org>

	* src/yelp-info-parser.c (info_body_text): Also substitute
	Notes, greatly simplifying previous:
	(info_process_text_notes): Remove.
	(parse_tree_level):
	(yelp_info_parse_menu): Update callers.
---
 ChangeLog              |    8 +
 src/yelp-info-parser.c |  347 +++++++++++++-----------------------------------
 2 files changed, 100 insertions(+), 255 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c2061e8..9078892 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2009-01-29  Jan Nieuwenhuizen  <janneke@gnu.org>
+
+	* src/yelp-info-parser.c (info_body_text): Also substitute
+	Notes, greatly simplifying previous:
+	(info_process_text_notes): Remove.
+	(parse_tree_level):
+	(yelp_info_parse_menu): Update callers.
+
 2009-01-18  Jan Nieuwenhuizen  <janneke@gnu.org>
 
 	* src/yelp-info-parser.c (info_image_get_attributes):
diff --git a/src/yelp-info-parser.c b/src/yelp-info-parser.c
index c5d38ca..8b623e3 100644
--- a/src/yelp-info-parser.c
+++ b/src/yelp-info-parser.c
@@ -42,8 +42,7 @@ GtkTreeIter *         find_real_sibling                  (GtkTreeModel *model,
 							  GtkTreeIter *comp);
 xmlNodePtr            yelp_info_parse_menu               (GtkTreeStore *tree,
 							  xmlNodePtr *node,
-							  gchar *page_content,
-							  gboolean notes);
+							  gchar *page_content);
 gboolean              get_menuoptions                    (gchar *line, 
 							  gchar **title, 
 							  gchar **ref, 
@@ -56,9 +55,6 @@ gboolean              resolve_frag_id                    (GtkTreeModel *model,
 void                  fix_tag_table                      (gchar *offset, 
 							  gpointer page, 
 							  TagTableFix *a);
-void   		      info_process_text_notes            (xmlNodePtr *node, 
-							  gchar *content,
-							  GtkTreeStore *tree);
 
 
 static GHashTable *
@@ -141,39 +137,108 @@ info_insert_image (xmlNodePtr parent, GMatchInfo *match_info)
   return parent;
 }
 
+static xmlNodePtr
+info_insert_note (xmlNodePtr parent, GMatchInfo *match_info)
+{
+  gchar* name = g_match_info_fetch (match_info, 1);
+  gchar* file = g_match_info_fetch (match_info, 2);
+  gchar* node = g_match_info_fetch (match_info, 3);
+  xmlNodePtr a = xmlNewTextChild (parent, NULL, BAD_CAST "a", BAD_CAST (name ? name : (node ? node : "Top")));
+  gchar* href = g_strconcat ("info:", (file ? file : ""), "#", (node ? node : "Top"), NULL);
+  xmlNewProp (a, BAD_CAST "href", BAD_CAST href);
+  g_free (name);
+  g_free (file);
+  g_free (node);
+  g_free (href);
+  return parent;
+}
+
+static GMatchInfo*
+regex_string_get_match_info (GRegex* regex, gchar const* string)
+{
+  GMatchInfo *match_info;
+  g_regex_match (regex, string, 0, &match_info);
+  g_regex_unref (regex);
+  return match_info;
+}
+		      
+static GMatchInfo*
+string_get_image_match_info (gchar const* string)
+{
+  return regex_string_get_match_info (g_regex_new ("(" INFO_C_IMAGE_TAG_OPEN_RE "((?:[^" INFO_TAG_1 "]|[^" INFO_C_TAG_0 "]+" INFO_TAG_1 ")*)" INFO_C_TAG_CLOSE_RE ")", 0, 0, NULL), string);
+}
+
+static GMatchInfo*
+string_get_note_match_info (gchar const* string)
+{
+  return regex_string_get_match_info (g_regex_new ("[*][Nn]ote\\s(?:([^:]+):)?\\s*(?:\\(([^:()]+)(?:(?:(?:[.]info)\\))|\\)))?\\s*([^():.,]+)(?:::)?", 0, 0, NULL), string);
+}
+
 /*
   Convert body text CONTENT to xml nodes, processing info image tags
-  when found.  IWBN add a regex match for *Note: here and call the
-  *Note ==> <a href> logic of info_process_text_notes from here.
+  and text notes when found.
  */
 static xmlNodePtr
-info_body_text (xmlNodePtr parent, xmlNsPtr ns, gchar const *name, gchar const *content)
+info_body_text (xmlNodePtr parent, gchar const *name, gchar *content)
 {
-  if (!strstr (content, INFO_C_IMAGE_TAG_OPEN))
-    return xmlNewTextChild (parent, ns, BAD_CAST name, BAD_CAST content);
+  if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)
+      && !strstr (content, "*Note ")
+      && !strstr (content, "*note "))
+    return xmlNewTextChild (parent, NULL, BAD_CAST name, BAD_CAST content);
 
-  gint content_len = strlen (content);
   gint pos = 0;
-  GRegex *regex = g_regex_new ("(" INFO_C_IMAGE_TAG_OPEN_RE "((?:[^" INFO_TAG_1 "]|[^" INFO_C_TAG_0 "]+" INFO_TAG_1 ")*)" INFO_C_TAG_CLOSE_RE ")", 0, 0, NULL);
-  GMatchInfo *match_info;
-  g_regex_match (regex, content, 0, &match_info);
-  while (g_match_info_matches (match_info))
+  gint content_len = strlen (content);
+  GMatchInfo *image_match_info = string_get_image_match_info (content);
+  GMatchInfo *note_match_info = string_get_note_match_info (content);
+
+  while (g_match_info_matches (image_match_info)
+	 || g_match_info_matches (note_match_info))
     {
       gint image_start;
       gint image_end;
-      gboolean image_found = g_match_info_fetch_pos (match_info, 0,
+      gboolean image_found = g_match_info_fetch_pos (image_match_info, 0,
 						     &image_start, &image_end);
-      gchar *before = g_strndup (&content[pos], image_start - pos);
-      pos = image_end + 1;
+      gint note_start;
+      gint note_end;
+      gboolean note_found = g_match_info_fetch_pos (note_match_info, 0,
+						    &note_start, &note_end);
+      gint start;
+      gint end;
+      if (image_found
+	  && (!note_found
+	      || (image_start < note_start)))
+	{
+	  note_found = FALSE;
+	  start = image_start;
+	  end = image_end;
+	}
+      else // note_found
+	{
+	  image_found = FALSE;
+	  start = note_start;
+	  end = note_end;
+	}
+
+      gchar *before = g_strndup (&content[pos], start - pos);
+      pos = end + 1;
       xmlNewTextChild (parent, NULL, BAD_CAST "para1", BAD_CAST (before));
       g_free (before);
       if (image_found)
-	info_insert_image (parent, match_info);
-      g_match_info_next (match_info, NULL);
+	{
+	  info_insert_image (parent, image_match_info);
+	  g_match_info_next (image_match_info, NULL);
+	}
+      else // (note_found)
+	{
+	  info_insert_note (parent, note_match_info);
+	  g_match_info_next (note_match_info, NULL);
+	}
     }
   gchar *after = g_strndup (&content[pos], content_len - pos);
   xmlNewTextChild (parent, NULL, BAD_CAST "para1", BAD_CAST (after));
   g_free (after);
+  g_match_info_free (image_match_info);
+  g_match_info_free (note_match_info);
   return 0;
 }
 
@@ -819,7 +884,6 @@ parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
 	char *page_no = NULL;
 	char *page_name = NULL;
 	char *page_content = NULL;
-	gboolean notes = FALSE;
 
 	debug_print (DB_DEBUG, "Decended\n");
 	do
@@ -830,23 +894,13 @@ parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
 				COLUMN_PAGE_CONTENT, &page_content,
 				-1);
 		debug_print (DB_DEBUG, "Got Section: %s\n", page_name);
-		if (strstr (page_content, "*Note") || 
-		    strstr (page_content, "*note")) {
-		  notes = TRUE;
-		}
 		if (strstr (page_content, "* Menu:")) {
-		  newnode = yelp_info_parse_menu (tree, node, page_content, notes);
+		  newnode = yelp_info_parse_menu (tree, node, page_content);
 		} else {
 		  newnode = xmlNewTextChild (*node, NULL,
 					     BAD_CAST "Section",
 					     NULL);
-		  if (!notes)
-		    info_body_text (newnode, NULL, "para", page_content);
-		  
-		  else {
-		    /* Handle notes here */
-		    info_process_text_notes (&newnode, page_content, tree);
-		  }
+		  info_body_text (newnode, "para1", page_content);
 		}
 		/* if we free the page content, now it's in the XML, we can
 		 * save some memory */
@@ -899,6 +953,9 @@ yelp_info_parser_parse_tree (GtkTreeStore *tree)
 	g_print ("XML follows:\n%s\n", xmlbuf);
 	*/
 
+	if (getenv ("YELP_INFO_DUMP_DOC"))
+	  xmlDocDump (stderr, doc);
+
 	return doc;
 }
 
@@ -981,7 +1038,7 @@ get_menuoptions (gchar *line, gchar **title, gchar **ref, gchar **desc,
 
 xmlNodePtr
 yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node, 
-		      gchar *page_content, gboolean notes)
+		      gchar *page_content)
 {
   gchar **split;
   gchar **menuitems;
@@ -996,12 +1053,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
     
 
   tmp = g_strconcat (split[0], "\n* Menu:", NULL);
-  if (!notes)
-    xmlNewTextChild (newnode, NULL,
-		     BAD_CAST "para", BAD_CAST tmp);
-  else {
-    info_process_text_notes (&newnode, tmp, tree);
-  }
+  info_body_text (newnode, "para1", tmp);
   g_free (tmp);
 
   menuitems = g_strsplit (split[1], "\n", -1);
@@ -1078,218 +1130,3 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
   
   return newnode;
 }
-
-void
-info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
-{
-  gchar **notes;
-  gchar **current;
-  xmlNodePtr holder;
-  xmlNodePtr ref1;
-  gboolean first = TRUE;
-
-  notes = g_strsplit (content, "*Note", -1);
-  holder = xmlNewChild (*node, NULL, BAD_CAST "noteholder", NULL);
-
-  for (current = notes; *current != NULL; current++) {
-    /* Since the notes can be either *Note or *note, we handle the second 
-     * variety here
-     */
-    gchar **subnotes;
-    gchar **current_real;
-
-    subnotes = g_strsplit (*current, "*note", -1);
-    for (current_real = subnotes; *current_real != NULL; current_real++) {
-      gchar *url, **urls, **ulink;
-      gchar *append;
-      gchar *alt_append, *alt_append1;
-      gchar *link_text;
-      gchar *href = NULL;
-      gchar *break_point = NULL;
-      gboolean broken = FALSE;
-      if (first) {
-	/* The first node is special.  It doesn't have a note ref at the 
-	 * start, so we can just add it and forget about it.
-	 */
-	first = FALSE;
-	info_body_text (holder, NULL, "para1", (*current_real));
-	continue;
-      }
-      /* If we got to here, we now gotta parse the note reference */
-
-      if (*current_real[0] == '_') {
-	/* Special type of note that isn't really a note, but pretends
-	 * it is
-	 */
-	info_body_text (holder, NULL, "para1",
-			g_strconcat ("*Note", *current_real, NULL));
-	continue;
-      }
-      append = strchr (*current_real, ':');
-      if (!append) {
-	info_body_text (holder, NULL, "para1", *current_real);
-	continue;
-      }
-      append++;
-      alt_append = append;
-      alt_append1 = alt_append;
-      append = strchr (append, ':');
-      alt_append = strchr (alt_append, '.');
-      if (alt_append && g_str_has_prefix (alt_append, ".info")) {
-	broken = TRUE;
-	alt_append++;
-	alt_append = strchr (alt_append, '.');
-      }
-      alt_append1 = strchr (alt_append1, ',');
-      if (!append && !alt_append && !alt_append1) {
-	info_body_text (holder, NULL, "para1", *current_real);
-	continue;
-      }
-      if (!append || alt_append || alt_append1) {
-	if (!append) {
-	  if (alt_append) append = alt_append;
-	  else append = alt_append1;
-	}
-	if ((alt_append && alt_append < append))
-	  append = alt_append;
-	if (alt_append1 && alt_append1 < append)
-	  append = alt_append1;
-      }
-      append++;
-      url = g_strndup (*current_real, append - (*current_real));
-
-      /* By now, we got 2 things.  First, is append which is the (hopefully)
-       * non-link text.  Second, we got a url.
-       * The url can be in several forms:
-       * 1. linkend::
-       * 2. linkend:(infofile)Linkend.
-       * 3. Title: Linkend.
-       * 4. Title: Linkend, (pretty sure this is just broken)
-       * 5. Title: (infofile.info)Linkend.
-       * All possibilities should have been picked up.
-       * Here:
-       * Clean up the split.  Should be left with a real url and
-       * a list of fragments that should be linked
-       * Also goes through and removes extra spaces, leaving only one 
-       * space in place of many
-       */
-      urls = g_strsplit (url, "\n", -1);
-      break_point = strchr (url, '\n');
-      while (break_point) {
-	*break_point = ' ';
-	break_point = strchr (++break_point, '\n');
-      }
-      break_point = strchr (url, ' ');
-      while (break_point) {
-	if (*(break_point+1) == ' ') {
-	  /* Massive space.  Fix. */
-	  gchar *next = break_point;
-	  gchar *url_copy;
-	  while (*next == ' ')
-	    next++;
-	  next--;
-	  url_copy = g_strndup (url, break_point-url);
-	  g_free (url);
-	  url = g_strconcat (url_copy, next, NULL);
-	  break_point = strchr (url, ' ');
-	  g_free (url_copy);
-	} else {
-	  break_point++;
-	  break_point = strchr (break_point, ' ');
-	}
-      }
-      if (url[strlen(url)-1] == '.') { /* The 2nd or 3rd sort of link */ 
-	gchar *stop = NULL;
-	gchar *lurl = NULL;
-	gchar *zloc = NULL;
-	stop = strchr (url, ':');
-	lurl = strchr (stop, '(');
-	if (!lurl) { /* 3rd type of link */
-	  gchar *link;
-	  gint length;
-	  stop++;
-	  link = g_strdup (stop);
-	  link = g_strstrip (link);
-	  length = strlen (link) - 1;
-	  link[length] = '\0';	  
-	  href = g_strconcat ("#", link, NULL);
-	  link[length] = 'a';
-	  g_free (link);
-
-
-	} else { /* 2nd type of link.  Easy. Provided .info is neglected ;) */
-	  if (broken) {
-	    gchar *new_url;
-	    gchar *info;
-	    gchar *stripped;
-
-	    new_url = g_strdup (lurl);
-	    info = strstr (new_url, ".info)");
-	    stripped = g_strndup (new_url, info-new_url);
-	    info +=5;
-	    lurl = g_strconcat (stripped, info, NULL);
-	    g_free (stripped);
-	    g_free (new_url);
-	  }
-	  zloc = &(lurl[strlen(lurl)-1]);
-	  *zloc = '\0';
-	  href = g_strconcat ("info:", lurl, NULL);
-	  *zloc = 'a';
-	}
-      } else { /* First kind of link */
-	gchar *tmp1;
-	gchar *frag;
-
-	tmp1 = strchr (url, ':');
-	if (!tmp1)
-	  frag = g_strdup (url);
-	else 
-	  frag = g_strndup (url, tmp1 - url);
-	g_strstrip (frag);
-	gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag);
-	href = g_strconcat ("#", frag, NULL);
-	g_free (frag);
-      }
-      for (ulink = urls; *ulink != NULL; ulink++) {
-	if (ulink == urls)
-	  link_text = g_strconcat ("*Note", *ulink, NULL);
-	else {
-	  gchar *spacing = *ulink;
-	  gchar *tmp;
-	  gint count = 0;
-	  while (*spacing == ' ') {
-	    spacing++;
-	    count++;
-	  }
-	  if (spacing != *ulink) {
-	    if (count > 1)
-	      spacing-=2;
-	    tmp = g_strndup (*ulink, spacing-*ulink);
-	    if (count > 1)
-	      spacing+=2;
-	    xmlNewTextChild (holder, NULL, BAD_CAST "spacing",
-			     BAD_CAST tmp);
-	    g_free (tmp);
-	    link_text = g_strdup (spacing);
-	  } else {
-	    link_text = g_strdup (*ulink);
-	  }
-	}
-	ref1 = xmlNewTextChild (holder, NULL, BAD_CAST "a",
-				BAD_CAST link_text);
-	if (*(ulink+1) != NULL)
-	  info_body_text (holder, NULL, "para", "");
-
-	g_free (link_text);
-	xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
-      }
-      g_strfreev (urls);
-      /* Finally, we can add the text as required */
-      info_body_text (holder, NULL, "para1", append);
-      g_free (url);
-      g_free (href);
-    }
-    g_strfreev (subnotes);
-  }
-  g_strfreev (notes);
-}
-- 
1.6.0.rc1.49.g98a8


