Following on from my previous post on bringing data from Twitter into SQL Server, you’ll recall that we were able to take data from a JSON data feed, using cURL, and parse it into SQL Server, using a custom Function.

This enabled the twitter feed to be loaded into a SQL database at the staggering performance of 38 records a second. In a future post, I hope to do some optimisation on the T-SQL process for getting this data in.

However, I wanted to see how quickly the data could be loaded using a C# application (since I’m a bit of a closet coder).

So, following on from Step 1 and Step 2 from the previous post, so you’ve taken the feeds from Twitter, and loaded the data into a Load table in SQL, I then want to get the data into a table in SQL Server.

There are several JSON libraries that are available, that link to C# (a list is here: http://json.org/). I chose JSON.NET (http://json.codeplex.com/) on the basis that it was quite frequently mentioned on StackOverflow (so if I had any questions, I’d stand a reasonable chance of getting an answer), and the examples looked pretty straightforward.

Having fired up VS 2012, I created a project, and added JSON.Net into it using NuGet (I like NuGet!), using Install-Package Newtonsoft.Json.

using System;
using System.Collections.Generic;
using System.Linq;
using Newtonsoft.Json;
using System.Data.SqlClient;

namespace JSONParse
{

class Program
{
static void Main(string[] args)
{
DateTime dStart, dEnd;
dStart = DateTime.Now;

// Connect to SQL
SqlConnection conn = new SqlConnection(“server=localhost; Trusted_Connection=yes; database=twitter_stream;”);
try
{
conn.Open();
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
}

try
{

// Set up the Reader process
SqlDataReader reader = null;
SqlCommand myCommand = new SqlCommand(
“select * from tweetJSON”, conn);
reader = myCommand.ExecuteReader();

while (reader.Read())
{
try
{
// Convert the JSON Data
dynamic obj = JsonConvert.DeserializeObject(reader[1].ToString());
string tJSID = reader[0].ToString();
Console.WriteLine(“id_str {0}”, obj.id_str);
SqlCommand insertcmd = new SqlCommand()
{
CommandText = @”insert into TweetJSONStaging ( tJSID, Country, id_str,
followers_count,profile_image_url,statuses_count,
profile_background_image_url,created_at,friends_count,
location,name,lang, screen_name, source, geo_loc, text)
VALUES (@tJSID, @country,@id_str,@followers, @profileURL, @statuses,
@backgroundimageurl, @created_at, @friendscount,@location, @name,
@lang, @screenname,@source, @geoloc, @text)”
};
insertcmd.Connection = new SqlConnection(“server=localhost; Trusted_Connection=yes; database=twitter_stream;”);
insertcmd.Parameters.AddWithValue(“@tJSID”, tJSID);
insertcmd.Parameters.AddWithValue(“@id_str”, (string)obj.id_str);
insertcmd.Parameters.AddWithValue(“@followers”, (string)obj.user.followers_count);
insertcmd.Parameters.AddWithValue(“@backgroundimageurl”, (string)obj.user.profile_background_image_url);
insertcmd.Parameters.AddWithValue(“@name”, (string)obj.user.name);
insertcmd.Parameters.AddWithValue(“@profileURL”, (string)obj.user.profile_image_url);
insertcmd.Parameters.AddWithValue(“@statuses”, (string)obj.user.statuses_count);
insertcmd.Parameters.AddWithValue(“@friendscount”, (string)obj.user.friends_count);
insertcmd.Parameters.AddWithValue(“@screenname”, (string)obj.user.screen_name);
insertcmd.Parameters.AddWithValue(“@lang”, (string)obj.user.lang);
insertcmd.Parameters.AddWithValue(“@text”, (string)obj.text);
insertcmd.Parameters.AddWithValue(“@source”, (string)obj.source);
insertcmd.Parameters.AddWithValue(“@created_at”, (string)obj.created_at);
string sCountry = “”, sLocation = “”, sGeoloc = “”;

try
{
sCountry = (string)obj.place.country;
}
catch (Exception e)
{
//Console.WriteLine(e.ToString());
}
insertcmd.Parameters.AddWithValue(“@country”, sCountry);

try
{
sLocation = (string)obj.user.location;
}
catch (Exception e)
{
//Console.WriteLine(e.ToString());
}
insertcmd.Parameters.AddWithValue(“@location”, sLocation);
try
{
sGeoloc = String.Join(“,”, obj.place.bounding_box.coordinates.Last.First);
}
catch (Exception e)
{
// Console.WriteLine(e.ToString());
}
insertcmd.Parameters.AddWithValue(“@geoloc”, sGeoloc);

insertcmd.Connection.Open();
try
{
insertcmd.ExecuteNonQuery();
}
catch (Exception)
{ }
insertcmd.Connection.Close();
}
catch (Exception)
{}
}
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
}

dEnd = DateTime.Now;
Console.WriteLine(“Time taken = “+ dEnd.Subtract(dStart));

Console.ReadKey();

}
}
}

Obviously, this code is an example, and you’d be wanting to have proper error handling, better functional layout, better commenting, etc, etc, etc in there, however, you can see how the code runs, and from a performance perspective, it’s better.

Running this code through the Debug mode in Visual Studio, I managed to get 155 records processed a second, pretty snappy.

Then, running it as an Application, outside Visual Studio I got 393 records a second! Just over 10 times faster than T-SQL with a Cursor.

image

Advertisements