[Kazehakase-cvs] CVS update: kazehakase/module/search

Back to archive index

Hiroyuki Ikezoe ikezo****@users*****
Tue Dec 5 14:09:35 JST 2006


Index: kazehakase/module/search/kz-search-hyper-estraier.c
diff -u kazehakase/module/search/kz-search-hyper-estraier.c:1.8 kazehakase/module/search/kz-search-hyper-estraier.c:1.9
--- kazehakase/module/search/kz-search-hyper-estraier.c:1.8	Mon Dec  4 11:50:53 2006
+++ kazehakase/module/search/kz-search-hyper-estraier.c	Tue Dec  5 14:09:35 2006
@@ -21,6 +21,8 @@
 #include <ctype.h>
 #include <glib/gi18n.h>
 #include <estraier.h>
+#include <glib.h>
+#include <glib/gstdio.h>
 
 #include "kazehakase.h"
 #include "utils/utils.h"
@@ -33,7 +35,7 @@
 #define HYPER_ESTRAIERRAIER_URI "http://hyperestraier.sourceforge.net/"
 #define DTD   "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
 #define HEAD  "<head>\n" \
-	      "  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">" \
+	      "  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/>" \
 	      "  <title>Full-text search in history</title>\n" \
 	      "  <link rel=\"stylesheet\" type=\"text/css\" href=\"history-search:?css=search-result.css\">\n" \
 	      "</head>\n"
@@ -219,7 +221,7 @@
 		int ecode, ret;
 		ret = est_db_close(priv->db, &ecode);
 		if (!ret)
-			g_warning("error: %s", est_err_msg(ecode));
+			g_warning("db close error: %s", est_err_msg(ecode));
 	}
 	if (priv->dbname)
 	{
@@ -299,7 +301,7 @@
 	g_string_append_printf(html, "<h1>Search results for %s</h1>",
 			       text);
 
-	dirname = g_strconcat(g_get_home_dir(), HISTORY_DIR, NULL);
+	dirname = g_build_filename(g_get_home_dir(), HISTORY_DIR, NULL);
 	len = strlen(dirname);
 	g_free(dirname);
 
@@ -349,9 +351,7 @@
 
 	ret = est_db_close(db, &ecode);
 	if (!ret)
-	{
-		g_warning("error: %s", est_err_msg(ecode));
-	}
+		g_warning("db close error: %s", est_err_msg(ecode));
 
 	g_string_append_printf(html, FOOTER, HYPER_ESTRAIERRAIER_URI, _EST_VERSION);
 	g_string_append(html, "</body></html>");
@@ -368,14 +368,54 @@
 	return create_search_result_html(search, text);
 }
 
+static gchar *
+get_document_encoding (const gchar *contents)
+{
+	gchar *encoding = NULL;
+	gchar *p;
+
+	if (!contents) return NULL;
+
+	p = (gchar*)contents;
+	while ((p = strstr(p, "<meta ")))
+	{
+		gchar *end;
+		p = strstr(p, "http-equiv=\"");
+		if (!p) break;
+		
+		p+=12;
+		end = strchr(p, '"');
+		if (!end) break;
+
+		if (g_ascii_strncasecmp(p, "content-type", end - p))
+			continue;
+
+		p = end;
+		/* negligent */
+		p = strstr(p, "charset=");				
+		if (!p) break; 
+		p+=8;
+		end = strchr(p, ';');
+		if (!end)
+			end = strchr(p, '"');
+		if (!end) break;
+	
+		encoding = g_strndup(p, end - p);
+		g_warning("%s", encoding);
+		break;
+	}
+
+	return encoding;
+}
+
 gboolean
 register_document (KzSearch *search, const gchar *uri, const gchar *encoding, const gchar *title, const gchar *contents, GTime mtime)
 {
 	ESTDB *db;
 	ESTDOC *doc;
 	int ret, ecode, success;
-	gchar *time_str;
-	gchar *filename;
+	gchar *time_str, *filename, *text = NULL;
+	gchar *utf8 = NULL;
 	KzSearchHyperEstraierPrivate *priv = KZ_SEARCH_HYPER_ESTRAIER_GET_PRIVATE(search);
 
 	db = est_db_open(priv->dbname, ESTDBWRITER | ESTDBCREAT, &ecode);
@@ -391,34 +431,46 @@
 	est_doc_add_attr(doc, ESTDATTRURI, filename);
 	g_free(filename);
 
-	est_doc_add_attr(doc, ESTDATTRTITLE, title);
+	if (title)
+		est_doc_add_attr(doc, ESTDATTRTITLE, title);
 
 	time_str = cbdatestrwww(mtime, 0);
 	est_doc_add_attr(doc, ESTDATTRMDATE, time_str);
 	g_free(time_str);
 
-	if (contents)
+	if (!encoding)
+		encoding = get_document_encoding(contents);
+	
+	if (encoding && strcmp(encoding, "UTF-8"))
+	{
+		utf8 = g_convert(contents, -1,
+				 "UTF-8", encoding,
+				 NULL, NULL,
+				 NULL);
+	}
+	if (utf8)
+	{
+		text = html_to_text(utf8);
+		g_free(utf8);
+	}
+	else
+		text = html_to_text(contents);
+
+	if (text)
 	{
-#warning FIXME! remove javascript code.
-		gchar *raw_text;
-		raw_text = remove_tag(contents, strlen(contents));
-		est_doc_add_text(doc, raw_text);
-		g_free(raw_text);
+		est_doc_add_text(doc, text);
+		g_free(text);
 	}
 
 	success = est_db_put_doc(db, doc, ESTPDCLEAN);
 	if (!success)
-	{
 		g_warning("register error: %s", est_err_msg(ecode));
-	}
 
 	est_doc_delete(doc);
 
 	ret = est_db_close(db, &ecode);
 	if (!ret)
-	{
 		g_warning("db close error: %s", est_err_msg(ecode));
-	}
 
 	return (success != 0) ? TRUE : FALSE;
 }
@@ -444,9 +496,7 @@
 
 	ret = est_db_close(db, &ecode);
 	if (!ret)
-	{
 		g_warning("error: %s", est_err_msg(ecode));
-	}
 
 	return (success != 0) ? TRUE : FALSE;
 }
@@ -483,9 +533,7 @@
 
 	ret = est_db_close(db, &ecode);
 	if (!ret)
-	{
 		g_warning("error: %s", est_err_msg(ecode));
-	}
 #warning FIXME! return GSource or something.
 	return 0;
 }
@@ -498,39 +546,52 @@
 }
 
 static void
+register_documents_in_path (KzSearch *search, const gchar *path)
+{
+	GDir *gd;
+	const gchar *file;
+
+	gd = g_dir_open(path, 0, NULL);
+	if (!gd)
+		return;
+
+	while ((file = g_dir_read_name (gd)))
+	{
+		gchar *new_path = g_build_filename (path, file, NULL);
+		if (g_file_test(new_path, G_FILE_TEST_IS_DIR))
+		{
+			register_documents_in_path(search, new_path);
+		}
+		else
+		{
+			gchar *uri, *contents;
+			GTime mtime;
+			struct stat st;
+
+			g_stat(new_path, &st);
+			mtime = st.st_mtime;
+			uri = g_strdup_printf("file://%s", new_path);
+			g_file_get_contents(new_path, &contents, NULL, NULL);
+
+			kz_search_register_document(search, uri, NULL, NULL, contents, mtime);
+ 
+			g_free(uri);
+			g_free(contents);
+		}
+		g_free(new_path);
+	}
+	g_dir_close (gd);
+}
+
+static void
 make_index (KzSearch *search)
 {
-	const gchar *estgather = "estcmd gather -sd "; 
-	gchar *command;
-	gint argc;
-	gchar **argv = NULL;
-	GSpawnFlags flags;
-	GPid pid;
-	
-	command = g_strconcat(estgather, 
-			      g_get_home_dir(),
-			      HISTORY_INDEX" ",
-			      g_get_home_dir(),
-			      HISTORY_DIR,
-			      NULL);
-
-	g_shell_parse_argv(command,
-			   &argc,
-			   &argv,
-			   NULL);
-	flags = G_SPAWN_SEARCH_PATH |
-		G_SPAWN_STDOUT_TO_DEV_NULL;
-	
-	g_spawn_async(NULL,
-		      argv,
-		      NULL,
-		      flags,
-		      NULL,
-		      NULL,
-		      &pid,
-		      NULL);
-	g_strfreev(argv);
-	g_free(command);
+	gchar *path;
+
+	path = g_build_filename(g_get_home_dir(), HISTORY_DIR, NULL);
+	register_documents_in_path(search, path);
+
+	g_free(path);
 }
 
 static gboolean


More information about the Kazehakase-cvs mailing list
Back to archive index