JAXB2: Unmarshal to unknown type

July 15th, 2010 — 7:31pm

Say that you have a couple of beans that you want JAXB to unmarshal some XML files to, but you don’t know which of those beans that the XML files should be mapped to without looking at the root element of the XML files.

This means that you can’t use the JAXB method newInstance that takes the target class as the first argument. What you can do is specify the package name instead, and have a ObjectFactory class in that package. You should be able to generate it somehow, but i couldn’t find any information about it, though it’s simple and shouldn’t be too long so you can write it yourself without breaking any sweat.

One of the beans that XML files should be able to be unmarshalled to.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
package org.eldslott.jaxb.messages;
 
import java.util.ArrayList;
import java.util.List;
 
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlElementWrapper;
import javax.xml.bind.annotation.XmlRootElement;
import org.eldslott.example.jaxb.messages.MessagePartResponse;
 
@XmlRootElement(name = "MessageResponse")
@XmlAccessorType(XmlAccessType.FIELD)
public class MessageResponse extends BaseMessage {
    private static final long serialVersionUID = 2114704894616054830L;
 
    @XmlElementWrapper(name = "partResponses")
    @XmlElement(name = "partResponse")
    private List<MessagePartResponse> partResponses = new ArrayList<MessagePartResponse>();
 
    @XmlElement
    private int code = ResponseCode.INVALID;
 
    @XmlElement
    private String message;
 
    @XmlElement
    @XmlJavaTypeAdapter(XmlDateAdapter.class)
    private Date timestamp;
 
    // getters and setters
}

And the abstract BaseMessage class.

1
2
3
4
5
6
7
8
9
10
11
12
13
package org.eldslott.jaxb.messages;
 
import java.io.Serializable;
 
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlRootElement;
 
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
abstract public class BaseMessage implements Serializable {
    private static final long serialVersionUID = 5381484324859864787L;
}

To marshal Dates, you need to set your own XmlJavaTypeAdapter and annotate your Date fields with XmlJavaTypeAdapter (you need to do this for Maps too, but it’s a bit longer, so i’ll skip it). This is one way to do it for Dates.

1
2
3
4
5
6
7
8
9
10
11
12
13
public static class XmlDateAdapter extends XmlAdapter<String, Date> {
    private final DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
 
    @Override
    public Date unmarshal(String date) throws Exception {
        return dateFormat.parse(date);
    }
 
    @Override
    public String marshal(Date date) throws Exception {
        return dateFormat.format(date);
    }
}

This is the XML file that the MessageResponse should be marshalled to.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
<?xml version="1.0" encoding="UTF-8"?>
<MessageResponse>
    <code>100</code>
    <message>partly successful</message>
    <timestamp>2010-01-18T22:34:00+0100</timestamp>
    <partResponses>
        <partResponse>
            <code>1</code>
            <correlationId>13</correlationId>
            <message>Success</message>
        </partResponse>
        <partResponse>
            <code>4</code>
            <correlationId>63</correlationId>
            <message>Failure</message>
        </partResponse>
    <partResponses>
</MessageResponse>

Then the marshalling and unmarshalling code could look something like this.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
package org.eldslott.jaxb;
 
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
 
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
 
import org.eldslott.jaxb.messages.BaseMessage;
 
public class XmlObjectSerializationStrategy {
    public String serialize(BaseMessage baseMessage) throws ObjectSerializationException {
        ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
 
        JAXBContext jaxbContext;
        Marshaller marshaller;
 
        try {
            jaxbContext = JAXBContext.newInstance(baseMessage.getClass());
            marshaller = jaxbContext.createMarshaller();
            marshaller.marshal(baseMessage, byteStream);
        } catch (JAXBException e) {
            throw new ObjectSerializationException(
                    "either could not create JAXB instance or initalize serializer", e);
        } 
 
        try {
            return byteStream.toString(encoding);
        } catch(UnsupportedEncodingException e) {
            throw new ObjectSerializationException(
                    "could convert byte array stream to string with encoding: " + encoding, e);
        }
    }
 
    public BaseMessage deserialize(String object) throws ObjectSerializationException {
        ByteArrayInputStream byteStream = new ByteArrayInputStream(object.getBytes());
 
        JAXBContext jaxbContext;
        Unmarshaller unmarshaller;
 
        try {
            ClassLoader loader = org.eldslott.jaxb.messages.ObjectFactory.class.getClassLoader();
            jaxbContext = JAXBContext.newInstance("org.eldslott.jaxb.messages", loader);
            unmarshaller = jaxbContext.createUnmarshaller();
        } catch (JAXBException e) {
            throw new ObjectSerializationException(
                    "either could not create JAXB instance or initalize unmarshaller", e);
        }
 
        try {
            return (BaseMessage) unmarshaller.unmarshal(byteStream);
        } catch (JAXBException e) {
            throw new ObjectSerializationException("could not unmarshal to BaseMessage", e);
        }
    }
}

And the ObjectFactory class, which JAXB needs to figure out which class it should unmarshal to.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
package org.eldslott.jaxb.messages;
 
import javax.xml.bind.annotation.XmlRegistry;
 
@XmlRegistry
public class ObjectFactory {
    public MessageResponse createMessageResponse() {
        return new MessageResponse();
    }
 
    public MessageRequest createMessageRequest() {
        return new MessageRequest();
    }
 
    // and so on
}

Now you can marshal and unmarshal neat and easily.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
package org.eldslott.jaxb.messages;
 
import java.util.Date;
 
import junit.framework.TestCase;
 
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
 
import org.eldslott.jaxb.messages.BaseMessage;
import org.eldslott.jaxb.messages.MessageResponse;
 
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = {"classpath:applicationContext-serializationStrategy.xml"})
public class TestXmlObjectSerializationStrategy extends TestCase {
    private XmlObjectSerializationStrategy strategy;
    @Test
    public void testMarshalling() throws ObjectSerializationException {
        MessageResponse messageResponse = new MessageResponse();
        messageResponse.setCode(1);
        messageResponse.setMessage("some text");
        messageResponse.setDate(new Date());
 
        String marshalled = strategy.serialize(messageRequest);
 
        @SuppressWarnings("unused")
        MessageRequest messageRequest2 = (MessageRequest)strategy.deserialize(marshalled);
 
        assertEquals(messageResponse.getMessage(), messageResponse2.getMessae());
    }
}

Comment » | code

Shit-simple traffic routing

June 17th, 2010 — 6:08pm

Say you’re at work, and you want to surf on slashdot, or watch fitness videos on youtube. You might like it, but your boss probably doesn’t, and it’s quite possible that there’s a part of the IT department that monitors your internet activity.

One solution is to route your packages through a tunnel to a proxy outside the office network, but this all sounds quite mind-blowingly horrificly over-complicated, so fuck no.

That’s what I thought before I tried it. Five lines of code later and it was solved (much thanks to the “newer” version of squid (>= 2.6)).

Lets call your computer at work computerB, and the computer outside the office network—e.g. at your home—computerA.

1. Setup squid on computerA

Download and install squid, either through your package manager or from source. Clear the configuration file and add these lines to make squid a transparent proxy listening on port 1234:

http_port 1234 transparent
acl all src 0.0.0.0/0.0.0.0
http_access allow all

Start squid.

2. Open a SSH tunnel to computerA

From computerB, create a ssh tunnel to computerA on port 1234.

ssh computerA -L 1234:localhost:1234

Or create a reverse proxy from computerA to computerB.

ssh computerB -R 1234:localhost:1234

3. Route packages on port 80 to localhost

Add the following rule to iptables, which will route all packages sent from your computer on port 80 to localhost on port 1234.

iptables -t nat -A OUTPUT -p tcp --dport 80 -j REDIRECT --to-port 1234

Conclusion

Now when you visit a web site on port 80 on computerB, iptables will redirect the package to 127.0.0.1:1234, where your reverse proxy created with SSH listens, and will send it to computerA on port 1234 over your SSH tunnel, where your transparent proxy created by squid listens, which will get whatever URL you wanted to visit, and sends the reply from the server back though your SSH tunnel and back to the browser.

Keep in mind that this only works when you’re visiting sites on port 80, if you want to route traffic on e.g. port 443, you’d have to modify the iptables rule.

1 comment » | Uncategorized

Calculating work time

June 1st, 2010 — 10:04pm

At work we report each week how many hours we’ve worked each day, so we (at least I have to) write down each day at what time I start and at what time I take lunch break and for how long, and when I quit, so I know how many hours I’ve worked each day (I usually take an odd number of minutes to each lunch and come and go add not so even hours).

I was having a hard time during the late evenings when I was about to go home from work counting how many minutes I’d worked that day, so I figured I should write a script to do it for me.

Each day I write down the date and then at what minute intervals I’ve worked. E.g.:

2010-01-19: 0913-1128, 1157-1819
2010-01-19: 0945-1218, 1140-2150

What I came up with was this:

1
2
3
4
5
6
7
8
9
10
11
12
import sys
 
def s(m):
    return ''.join([x for x in list(m) if x not in '-: '])
def f(m):
    return (i(m,4)-i(m,0))*60+i(m,6)-i(m,2)
def i(m,d):
    return int(m[d:d+2])
def o(m):
    print "%sh %sm" % (m / 60, m % 60)
 
o(sum([f(s(x)) for x in sys.argv[1:]]))

Example usage (‘,‘, ‘:‘ and ‘ ‘ are ignored):

[email protected] ~ $ work 0913-1128, 1157-1819
8h 37m
[email protected] ~ $ work 0945-1218, 1140-2150
12h 43m
[email protected] ~ $ work 0945-1218, 1140-2150 0913-1128, 1157-1819
21h 20m

Comment » | code

XML Marshaling with Hibernate and Spring

June 1st, 2010 — 9:40pm

According to wikipedia, “[...] marshalling (similar to serialization) is the process of transforming the memory representation of an object to a data format suitable for storage or transmission. It is typically used when data must be moved between different parts of a computer program or from one program to another.”

Let’s say we have to marshal XML files to Java objects in a clean and easy way, Spring can it happen for you. But as soon as you want to make a bit more complex marshaling, it strays for being easy to being a pain in the ass, so I though I should show how I did it after a lot of curses seeking documentation.

The example I’m going to show is a way to take a fairly easy XML and marshaling it, with the flexibility to extend to pretty big needs. The XML below is quite short, and only have some necessary information. It is thought of as a booking that’s just been made for a movie or theater, and needs to be handled by the system for later processing.

Sent either as HTTP PUT or on an JMS queue

In this example the XML can be send directly over HTTP as a RESTful HTTP PUT, or posted on an JMS queue (such as Apache ActiveMQ or similar). Spring will do the marshaling, and Hibernate the validation.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
<booking>
    <show>
        <code>738</code>
        <description>The Making of a Spy</description>
    </show>
    <location>
        <code>83</code>
        <description>South Theater</description>
    </location>
    <date>2011-02-19T21:30:00+0100</date>
    <name>Cate Archer</name>
    <email>[email protected]</email>
    <seats>
        <seat>
            <row>4</row>
            <chair>12</char>
        </seat>
        <seat>
            <row>4</row>
            <chair>13</char>
        </seat>
    </seats>
</booking>

BookingController.java

The BookingController class will get the XML file both when using an JMS queue and when using REST. MessageListener is a Spring interface, which forces you to implement the method onMessage, which will be called each time a message is sent on the queue the application is configured to listen on. onMessage will then send it as a mocked HTTP PUT to the addBooking method. Here Spring works it’s real magic. The @RequestBody annotation indicates that the message to be marshaled (the XML file) will be converted to the type of object specified as the argument type (in this case, Booking). The @Valid annotation indicated that the Hibernate constraint annotations used in the said type should be enforced.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
package org.eldslott.booking;
 
import java.util.Date;
import java.util.Set;
 
import javax.jms.JMSException;
import javax.jms.Message;
import javax.jms.MessageListener;
import javax.jms.TextMessage;
import javax.validation.ConstraintViolation;
import javax.validation.Valid;
import javax.validation.Validation;
import javax.validation.ValidationException;
import javax.validation.Validator;
import javax.validation.constraints.NotNull;
 
import org.hibernate.validator.constraints.NotEmpty;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.mock.web.MockHttpServletRequest;
import org.springframework.mock.web.MockHttpServletResponse;
import org.springframework.stereotype.Controller;
import org.springframework.ui.ModelMap;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.servlet.HandlerAdapter;
 
@Controller
@RequestMapping(value = "/")
public class BookingController implements MessageListener {
    private static final Validator validator;
 
    @Autowired private MockHttpServletRequest request; // Used to mock the HTTP PUT if a JMS queue is used.
    @Autowired private HandlerAdapter adapter; // Will be set when using <mvc:annotation-driven /> in the application context.
 
    static {
        // Use default javax.validation validator.
        validator = Validation.buildDefaultValidatorFactory().getValidator();
    }
 
    @RequestMapping(method = RequestMethod.PUT)
    public ModelMap addBooking(@RequestBody @Valid Booking booking) throws ValidationException {
        Set<ConstraintViolation<Reservation>> violations = validator.validate(booking);
 
        // Loop through all violations (if any) of the validation constraints we've put on the Booking object.
        for (ConstraintViolation<Reservation> violation : violations) {
            Object annotation = violation.getConstraintDescriptor().getAnnotation();
            String field = violation.getPropertyPath().toString();
 
            String reason = "is invalid";
            if (annotation instanceof NotNull || annotation instanceof NotEmpty) {
                reason = "must be set";
            }
 
            String message = "\n\t'%s' %s in %s\n\tinvalid value: '%s'\n\tconstraints are: '%s'";
            String exception = String.format(message,
                    field,
                    reason,
                    violation.getLeafBean().getClass().getSimpleName(),
                    violation.getInvalidValue(),
                    violation.getConstraintDescriptor().getAnnotation()
            );
 
            // Hibernate will not throw any exceptions by itself if the constraints are violated.
            throw new ValidationException(exception);
        }
 
        // Do something with the booking object.
 
        return null;
    }
 
    @Override
    public void onMessage(Message message) {
        request = new MockHttpServletRequest();
        request.setRequestURI("/");
        request.setMethod("POST");
        request.addHeader("Content-Type", "text/xml;charset=UTF-8");
 
        if (!(message instanceof TextMessage)) {
            throw new IllegalArgumentException("Message must be of type TextMessage");
        }
 
        String content;
 
        try {
            content = ((TextMessage)message).getText();
        }
        catch (JMSException e) {
            System.out.println("Could not get content of JMS message: " + e.getMessage());
            throw new RuntimeException(e);
        }
 
        try {
            request.setContent(content.getBytes());
            adapter.handle(request, new MockHttpServletResponse(), this);
        }
        catch (Exception e) {
            System.out.println("Could not invoke controller: " + e.getMessage());
            throw new RuntimeException(e);
        }
    }
}

Booking.java

This is the class that Spring should try to marshal the XML file to. It is pretty straight forward. Apply all the constraints you want to each field, and if the field is not a native java type, you should put the @Valid annotation on it, and it will too be validated. A custom date adapter is used here to parse the dates with a specific format.

If multiple nodes of the same type in the XML file is child nodes to a certain node, you probably want to use the @XmlElementWrapper element, e.g.:

1
2
3
4
<nodeA>
    <nodeB>C</nodeB>
    <nodeB>C</nodeB>
</nodeA>

If you won’t you’d need to have a private NodeA nodeA, whose only content would be a private List nodeBs. Now you can instead skip the NodeA reference and use the list of NodeBs directly. Look at the example XML for further clarification.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
@XmlRootElement(name = "booking")
@XmlAccessorType(XmlAccessType.FIELD)
public class Booking {
    private static class DateAdapter extends XmlAdapter<String, Date> {
        private final DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
 
        public Date unmarshal(String date) throws Exception {
            return dateFormat.parse(date);
        }
 
        public String marshal(Date date) throws Exception {
            return dateFormat.format(date);
        }
    }
 
    @XmlElement(name = "seat")
    @XmlElementWrapper(name = "seats")
    @Valid
    @NotNull
    private List<Seat> seats;
 
    @Future
    @NotNull
    @XmlJavaTypeAdapter(DateAdapter.class)
    @XmlElement(name = "date")
    private Date date;
 
    @XmlElement(name = "name")
    @NotEmpty
    private String name;
 
    @XmlElement(name = "email")
    @Email
    @NotEmpty
    private String email;
 
    @XmlElement(name = "show")
    @NotNull
    @Valid
    private Show show;
 
    @XmlElement(name = "location")
    @NotNull
    @Valid
    private Location location;
 
    // getters and setters
}

Seat.java

The Seat class is straight forward. Only the row and chair number.

1
2
3
4
5
6
7
8
9
10
11
12
13
@XmlRootElement(name = "seat")
@XmlAccessorType(XmlAccessType.FIELD)
public class Seat {
    @XmlElement(name = "row")
    @NotNull
    private Integer row;
 
    @XmlElement(name = "chair")
    @NotNull
    private Integer chair;
 
    // getters and setters
}

Show.java

Another nifty feature is to for similar nodes extend an abstract representation of it to write less code. The Show class looks like the others, except that id doesn’t have any fields of its own; they are all represented in the AbstractNode class.

1
2
3
4
5
@XmlRootElement(name = "show")
@XmlAccessorType(XmlAccessType.FIELD)
public class Show extends AbstractNode {
    // no more code needed
}

Location.java

And the same with the Location class.

1
2
3
4
5
@XmlRootElement(name = "show")
@XmlAccessorType(XmlAccessType.FIELD)
public class Location extends AbstractNode {
    // no more code needed
}

AbstractNode.java

Here is where the difference lie. You still have to apply the @XmlRootElement annotation, but you may not supply it with a name, because that is already done in the classes extending this one. Except for that little, but oh so damn important thing, it looks like all the other node classes.

1
2
3
4
5
6
7
8
9
10
11
12
13
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
public class AbstractNode {
    @XmlElement(name = "code")
    @NotNull
    private Integer code;
 
    @XmlElement(name = "description")
    @NotEmpty
    private String description;
 
    // getters and setters
}

applicationContext.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xmlns:context="http://www.springframework.org/schema/context"
        xmlns:amq="http://activemq.apache.org/schema/core"
        xmlns:mvc="http://www.springframework.org/schema/mvc"
        xsi:schemaLocation="
            http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
            http://www.springframework.org/schema/context http://www.springframework.org/schema/context/spring-context-3.0.xsd
            http://www.springframework.org/schema/mvc http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
            http://activemq.apache.org/schema/core http://activemq.apache.org/schema/core/activemq-core.xsd">
 
    <context:annotation-config />
 
    <!--
        Configures the @Controller programming model and registers the HandlerMapping and HandlerAdapter
        required to dispatch requests to your @Controllers. In addition, it applies sensible defaults
        based on what is present in the classpath, such as @Valid, @NumberFormat, @DateTimeFormat etc.
     -->
    <mvc:annotation-driven />
    <bean id="request" class="org.springframework.mock.web.MockHttpServletRequest" />
 
    <amq:queue id="receptionQueue" physicalName="org.eldslott.booking.reception"/>
 
    <amq:connectionFactory id="queueConnectionFactory" brokerURL="tcp://localhost:61616"/>
 
    <!-- Message listener container for the reception queue -->
    <bean class="org.springframework.jms.listener.DefaultMessageListenerContainer">
        <property name="concurrentConsumers" value="5"/>
        <property name="connectionFactory" ref="queueConnectionFactory"/>
        <property name="destination" ref="receptionQueue"/>
        <property name="messageListener">
            <bean class="org.eldslott.booking.BookingController" />
        </property>
    </bean>
</beans>

pom.xml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
<project 
        xmlns="http://maven.apache.org/POM/4.0.0" 
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
        xsi:schemaLocation="
            http://maven.apache.org/POM/4.0.0 
            http://maven.apache.org/maven-v4_0_0.xsd">
 
    <modelVersion>4.0.0</modelVersion>
    <groupId>org.eldslott</groupId>
    <artifactId>eldslott_marshaller_jar</artifactId>
    <packaging>jar</packaging>
 
    <name>eldslott marshaller jar</name>
 
    <description>
        eldslott marshaller jar
    </description>
 
    <version>0.1-SNAPSHOT</version><!-- SNAPSHOT -->
 
    <dependencies>
        <dependency>
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-oxm</artifactId>
            <version>3.0.0.RELEASE</version>
            <exclusions>
                <exclusion>
                    <groupId>commons-logging</groupId>
                    <artifactId>commons-logging</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
                    <groupId>org.springframework</groupId>
                    <artifactId>spring-test</artifactId>
            <version>3.0.0.RELEASE</version>
            <exclusions>
                <exclusion>
                    <groupId>commons-logging</groupId>
                    <artifactId>commons-logging</artifactId>
                </exclusion>
            </exclusions>
                </dependency>
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-web</artifactId>
            <version>3.0.0.RELEASE</version>
        </dependency>
        <dependency>
            <groupId>org.hibernate</groupId>
            <artifactId>hibernate-validator</artifactId>
            <version>4.0.2.GA</version>
        </dependency>
        <dependency>
            <groupId>javax.persistence</groupId>
            <artifactId>persistence-api</artifactId>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.activemq</groupId>
            <artifactId>activemq-core</artifactId>
        </dependency>
        <dependency>
            <groupId>org.apache.xbean</groupId>
            <artifactId>xbean-spring</artifactId>
        </dependency>
    </dependencies>
</project>

Comment » | code

Fun with TCL

January 14th, 2010 — 7:10pm

I haven’t written much in TCL; only one script. It’s sole purpose is to find http and https links in channels, download them with curl, extract the title element and print it to the channel where the link was posted. I posted about this script a while back.

Today a Google link got posted in a channel and the title “403: Forbidden” was fetched. This is because Google doesn’t allow Curl to fetch their web pages, so I had to change the user agent of Curl. This is how it turned out:

catch {exec $binary(CURL) --insecure -A "Mozilla/4.73 [en] (X11; U; Linux 2.2.15 i686)" "$txt"} html

txt is the posted URL, and html is where the fetched HTML will be put in. But when I added this user agent (the -A option for Curl), no title was posted, and I got an error telling me invalid command "en".

One of the few things I know about TCL is that everything is a string, and I’ve learned the hard way that when you print something, you have to be sure there ain’t any bad characters in the text, like [ { and such, or else TCL will think you're trying to execute a command or something else. So, obviously there's one of those bad characters in the HTML, so I start searching for ways to escape every bad character in the html variable and print it, and eventually I end up with this after reading an article about how to write TCL scripts that don't choke on bad input:

1
2
3
4
5
6
7
8
9
10
11
proc filt {data} {
    regsub -all -- \\\\ $data \\\\\\\\ data
    regsub -all -- \\\[ $data \\\\\[ data
    regsub -all -- \\\] $data \\\\\] data
    regsub -all -- \\\} $data \\\\\} data
    regsub -all -- \\\{ $data \\\\\{ data
    regsub -all -- \\\" $data \\\\\" data
    return $data
}
 
set html [filt [join [split [list $html]]]]

And I try posting the same link again. What does it tell me? Invalid command "en". *facepalm*

Some deep breaths and a broken keyboard later I think "okay, maybe it's not the HTML", and I search for "en" in the script. The first thing I see, on the line right above the one I've been wrestling with for the last thirty minutes, is this in the user agent for Curl: [...] Mozilla/4.73 [en] (X11; [...].

For those who don’t know, if you want to execute a command in the middle of something in TCL, just put it in brackets ([command]).

Modify the Curl code shown above to just escape the brackets, and everything works fine.

catch {exec $binary(CURL) --insecure -A "Mozilla/4.73 \[en\] (X11; U; Linux 2.2.15 i686)" "$txt"} html
set html [filt [join [split [list $html]]]]

Note to self: In mother Russia^W^W TCL, everything is a string, so putting it inside the double quotes is only so that Curl will treat it as one argument, and not what the shell equivalent would mean; ignore any weird characters between them.

Comment » | code

Tamier – Chapter 1: The Yellow Flag

December 22nd, 2009 — 6:02am
Chapter 1: The Yellow Flag
Chapter 1: The Yellow Flag

About fifty years ago the population boom really made a name for itself. The living conditions were better than anyone could have dreamed about. Synthetic food could be made from waste material. Poverty, hunger and sickness was like an ancient myth. Every other year the population doubled. But soon the food production couldn’t keep up, and the cities couldn’t grow fast enough. When people started to get hungry and didn’t have a place to live, crime increased. Food prizes sky rocketed and many would kill for it if they couldn’t afford it.

The world government had known for a long time that something needed to be done. The first thing they did was securing their position by erecting huge enclosed biosphere buildings with their own ecosystem. It had it all: lakes, forests, artificial sky, and only the most promising and powerful families got to live in them. This was still done while everyone’s life was as good as it could be. None complained, and none interfered.

The second thing they did was instate the GPD, the “Global Patrol Division,” in response to the increased violence and crime. They were supposed to keep the peace, quell uprisings and basically keep the people at bay for when things got worse. The GPD was supposed to be a good idea, a way to a safer society, but of course it didn’t. It was nothing less than a huge private military organization. After a while they started to implant microchips in their employees (the employees were commonly called “GPDs”). Apparently it was “just a GPS tracker.” The GPDs didn’t have a choice; they were poor, and any other job was really hard to come by nowadays. After a while people started to realize it wasn’t just a tracker in the chip. The GPDs spent more and more time patrolling, and often sleeping over at one of the many company headquarters. People started to call it “obedience chips” after a while. The GPDs soon had no thoughts of their own. They only did what they were told.

The GPDs got rough, merciless and in the company’s own words, “just.” The smallest crimes were punished by death, and people started to disappear all the time. People rarely left their homes at night, and many not even during the day. No one could do anything about it. Firearms, martial arts and any other sort of combat training were strictly illegal for civilians, and the GPD had become like an occupying army of the city.

***

The looming structures reached high into the sky, stealing most of the sunlight. The rest the pollution took care of, limiting the light to a bare minimum, suited better for late evening than midday. The Yellow Flag was a smaller bar, located in the less known and even less populated area then the neighboring ones. Tamier preferred it that way. Especially lately since she had caught the interest of the GPD. They probably didn’t know exactly what she looked like, or they would probably already have caught her. She knew they were on to her. She had been careless and not seen the search drone. It took her picture. From an angle, true, but her picture none the less.

The Yellow Flag lay on the rooftop of an old factory building which had been turned into a market square. There was as much activity on the rooftops as it was on the ground. There was just too many people. On the roofs the roads reached like a labyrinth across the different buildings. There were ladders, elevators and crisscrossing catwalks everywhere.

It was getting late, and Tamier had taken the least populated route here that she knew about. She really didn’t want to bump into any GPDs. Or anyone else either for that matter. Many people were just as dangerous meeting during the night as the GPDs was.

Tamier had just reached the outside of the bar. They dim light slipped out between the planks in the boarded up window, and muffled voices could be heard from inside. She took a last look behind her shoulder to see if anyone was there, then opened the door and stepped inside. There wasn’t many people inside, but every conversation stopped and every face turned towards her for a short while, until she pulled back the hood of her cloak and they could see it was her. She knew everyone in here more or less. No one used to come here anymore who hadn’t already been here before.

She glanced at the patrons, nodded towards the bartender and walked straight towards the back of the bar and entered into the bartenders personal room, closing the door behind her. She took off her cloak and lay it on a chair beside the door, and started pacing back and forth. Soon after, the bartender entered the room.

- “How did it go?” the bartender asked. He was a big, muscular man well into his forties with a steadily increasing belly size.
- “I got the package, Baol.” Tamier answered. “A drone took my picture when I was leaving. It must have been a bad picture, or else I wouldn’t be here now.” She opened her bag and brought forth a small black box and gave it to Baol.
- “The client payed well for this delivery, nice work. Be careful Tamier, I’ve got a big one coming up for you next week. Try to keep low until then.” He opened a safe and took out a thick envelope, looked inside it and threw it to Tamier. “Here’s you share. And there’s something else. Someone left a package for you yesterday. Never seen the guy before. It’s there on the desk.” He pointed towards a small brown package, not much bigger than her thumb. “See you next week, Tamier.”
- “Sure.” She put both the brown package and the envelope with money in her bag, pulled on her cloak and went out into the alley.

With a few seemingly simple jumps and a ladder later she was on another rooftop, hidden from sight by two air ducts, looking at the people on the ground a hundred meters down. This was where she used to spend her nights. The staircase to the top floor of this building had crashed long ago, so she didn’t have to look out for others coming here, unless they had the same athletic skills as Tamier. She was born on the rooftops, and raised by her brother when the GPDs had killed her parents for running an illegal delivery service. Tamier and her brother had been wanted ever since, because they were related. Of course it wasn’t easy for the GPDs to find a suspect in this town because of the huge population, but if you weren’t careful, they would find you.

Tamier and her brother had taken up their parents service and lived in hiding from the GPDs. It had worked out pretty well until Tamier’s brother disappeared five years ago. No one knew what happened to him, but everyone understood that it had to be the GPDs. Ever since then Tamier had cut off all her connections to anyone not strictly a fence, who handled her clients.

She pulled the cloak around her tighter, went inside and dropped into her couch, forgetting about the package she’d gotten from Baol, and soon sleep was creeping in on her.

Comment » | story

OSD RSS notification

November 25th, 2009 — 4:11pm

I’m using the Thunderbird monster on my workstation for reading RSS feeds, but I’ve always wanted to have a lightweight reader flash something on the screen when certain ‘feeds of interests’ are updated, but I didn’t want it on my workstation, in case I’m busy with some full screen application. This led me to find some way to check for RSS updates and display a notification box of some sort when a new entry was found.

What I came up with was a python script looping over RSS feeds, checking for new entries, and uses notify-send (which uses libnotify) to show a box for a while on the screen indicating a new entry was found.

I used a program I wrote a while ago called sire to determine which entries that are new. It basically just saves all entries in a list and, if the entry does not exist in that list, it’s new and should be displayed. You’ll crap your pants if you’re using large feeds for a while, since this is an O(n) operation, and you’re better of using a red-black tree for checking if it exists or not (hash lists will get slow after a while, since you’ll have to resize it from time to time).

This script will run forever, checking all RSS feeds every two minutes and display new entries for one hour (if multiple entries, they stack). Keep in mind though, this was designed to only check a single feed with updates not happening that often. If you have a couple of feeds that usually updates pretty often, you might want to adjust the timeout and sleep variables to better suit your needs.

If you only want to display entries from a certain category, add the allowed ones to the accept list and uncomment the if-clause.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/python
import feedparser, sys, commands, time
 
feeds = [
    "http://yourfeed.com/rss.xml",
    "http://otherfeed.com/rss.xml"
]
#accept = ['category 1', 'category 2'] # filter everything except these
level = 'critical' # critical = red
timeout = 120 # seconds
 
def f(data):
    return data.encode("latin1","ignore")
 
while True:
    for feed in feeds:
        d = feedparser.parse(feed)
        for entry in d.entries:
            title = f(entry.title)
            category = f(entry.category)
 
            #if category not in accept:
            #    continue
 
            retval = commands.getstatusoutput('sire -P rss -D 0 -L -a %s' % title)
            if retval[0] == 0:
                commands.getstatusoutput('notify-send -t %s -u "%s" "%s" "%s"' % \
                (str(timeout*1000), level, category, title))
 
    time.sleep(120)

P.S.
If you don’t want to implement your own, better way of determine if an entry is new or not, you may fetch sire from svn:

svn co svn://eldslott.org/python/sire/

1 comment » | code

Tiny obfuscated digital clock howto

August 28th, 2009 — 6:03pm

Me and a friend competed to make the shortest python script that printed big ascii numbers from numbers given as argument to the script.

The only rules were: not compressed or self executed, and the output numbers should look like this (number eight):

####
#  #
####
#  #
####

So when running “python clock.py 1234567890“, the output would look like this:

   # #### #### #  # #### #### #### #### #### ####
   #    #    # #  # #    #       # #  # #  # #  #
   # #### #### #### #### ####    # #### #### #  #
   # #       #    #    # #  #    # #  #    # #  #
   # #### ####    # #### ####    # #### #### ####

To begin with, there are only four different kind of lines used to represent the numbers, right? So we don’t have to keep long strings for each number, only a list for each number telling the program which line to use for that particular number.

The different kind of lines are:

0: '####'
1: '#  #'
2: '#   '
3: '   #'

So to print say, the number 9, we would use the lines (from the top) 0, 1, 0, 3 and then 0:

0: '####'
1: '#  #'
0: '####'
3: '   #'
0: '####'

So for the number 9 we have a list of those integers, [0,1,0,3,0], and likewise for the other numbers. If we then have a list of the different kind of lines, ["####","# #","# "," #"], we can then print the numbers easily. Now, say we get the number 9 as the argument for the program, then we can print the number this way:

1
2
3
4
lines = ["####","#  #","#   ","   #"]
nine = [0,1,0,3,0]
for num in nine:
    print lines[num]

But now we can have a lot more numbers as the argument, so we have to do this for every number. And if we extend the variable nine to include the lines for all numbers instead of just nine, we would get something like the following (n[4] is the lines for the number 4 and so on).

1
2
3
4
5
6
7
8
9
10
import sys
lines = ["####","#  #","#   ","   #"]
n = [
    [0,1,1,1,0], [3,3,3,3,3], [0,3,0,2,0], [0,3,0,3,0], [1,1,0,3,3],
    [0,2,0,3,0], [0,2,0,1,0], [0,3,3,3,3], [0,1,0,1,0], [0,1,0,3,0]
]
for arg in sys.argv[1]:
    for num in n[arg]:
        sys.stdout.write(lines[num]) # don't use print, as it will put a newline after each number
    print # newline after each row of all numbers

Now we have a program that prints nice big ascii numbers, but we can make it a lot shorter with list comprehension and some small tricks. After that we change the program so that instead of printing numbers specified as the argument, we take the local time, and then we’ll have a digital clock.

The array can be shortened by making a string of it. A string is the same as a list of characters, so it makes no difference from the current array, except that we have to index it a bit different since we have a single list now instead of a list of lists. int("01030"[1]) is the same as [0,1,0,3,0][1], but we get fewer characters since we don’t need the commas. Now the n variable changes to this:

1
2
3
4
5
n = [
    [0,1,1,1,0], [3,3,3,3,3], [0,3,0,2,0], [0,3,0,3,0], [1,1,0,3,3],
    [0,2,0,3,0], [0,2,0,1,0], [0,3,3,3,3], [0,1,0,1,0], [0,1,0,3,0]
]
n = "01110333330302003030110330203002010033330101001030"

And to index it right, all we have to do is take the current number that we’re supposed to print, multiply it by five (since there are five lines, so the number would be five times the number elements from the beginning) plus the current line (0-4).

1
2
3
4
5
6
7
import sys
lines = ["####","#  #","#   ","   #"]
n = "01110333330302003030110330203002010033330101001030"
for arg in sys.argv[1]:
    for (num, i) in enumerate(n[arg]):
        sys.stdout.write(lines[int(n[int(arg)*5+i])])
    print

And finally with some list comprehension we can get all this to one line. We start from the back. After one line has been printed for each number specified, there should be a newline, so we would have a '\n'.join() statement to begin with. After that, we print the lines one after the other. The list comprehension is a bit complicated, but I’ll try to explain it in the comments.

1
2
3
4
5
6
7
8
9
10
'\n'.join(                        # each row of lines should be separated with a newline
    [                             # the first comprehension, five rows of 'number-lines'
        ' '.join(                 # join each line with a space between them
            [                     # second comprehension, create a list of all line number y 
                                  # for each number specified
                lines[ n[int(z) * 5 + y] ] for z in sys.argv[1]
            ]
        ) for y in range(5)       # five lines for each number
    ]
)

If we then remove the comments, newlines and spaces we get the following nice piece of code.

1
'\n'.join([' '.join([lines[n[int(z)*5+y]]for z in sys.argv[1]])for y in range(5)])

And if we now replace the variable lines in that with the value of lines instead, and the same with the variable n, we get the following (included line breaks so it would fit).

1
2
3
4
import sys
print '\n'.join([' '.join([["####","#  #","#   ","   #"]\  # ... join([lines
[int("01110333330302003030110330203002010033330101001030"\ # [n
[int(z)*5+y])] for z in sys.argv[1]]) for y in range(5)])  # [int(z)*5+y]] ... ]

Lastly, this post was about a digital clock, so we import the time module and call it t, put everything in a while loop, add t.sleep(1) at the end, replace the sys.argv[1] with t.strftime("%H%M%S"), replace "####" with '#'*4 (one less character) and clear the screen. I used print '\n'*99 instead of os.system("clear") since it’s less code (and also don’t have to import the os module). This is what we get:

1
2
3
4
import time as t
while 1:print'\n'*99+'\n'.join([' '.join([['#'*4,"#  #","#   ","   #"]\
[int("01110333330302003030110330203002010033330101001030"[int(z)*5+y])]\
for z in t.strftime("%H%M%S")])for y in range(5)])+'\n',t.sleep(1)

And we’re done. 225 characters (without the line breaks). And for those who wonder; my friend beat me on this one. We came to about the same solution, though my friend finished a lot earlier than I did.

1 comment » | code

Countdown timer with progress bar and reminder written in python with ncurses

August 15th, 2009 — 3:02pm

A while ago I saw a script of a friend of mine which reminded oneself of when once tea was done. My friends script ran for a couple of minutes, then played a sound of someone telling you that the tea was done. I wanted a similar one, though with a progress bar and a visual instead of audible reminder, since I don’t have any speakers on the computer that I’d be running the script on. I know there are some nice progress bar utilities that I could’ve used, but I was more interested in writing one myself.

I started writing it in python, and every time I updated the progress bar (and the remaining time), I ran the clear command, though it became obvious after a while that it wouldn’t hold, since I’d get a ‘flickering effect’ from it. So I tried out ncurses, and it worked out pretty good. I had never used ncurses before, so I had to pull my hair for a while before I got the hang of it, and that I had to use less to see the error messages from python. There was also a nice ‘flashing’ function in ncurses which inverts the console colours for a short amount of time, which was a good visual reminder when the script was done.

If the flashing effect is active in the configuration, a non-blocking input is listening on stdin for any character to be pressed, and when it is, the program quits. If no flash is used the program quits as soon as the time runs out.

It automatically resizes the progress bar if the console width is changed.

Sample output:

 Tea ready in 3 minutes and 32 seconds.
   3.58% [====-------------------------------------------------------------------------------]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/python
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Author: scorch
# Email: [email protected]
# Date: 2009-08-15
# Version: 0.2
#
 
import os, time, sys, atexit, curses, traceback, select
from signal import signal, SIGTERM
from sys import exit
 
# ncurses screen
stdscr = None
 
############### CONFIGURATION ###############
#
# how the progress bar should look like
#
# 1: [===========------------]
# 2: |---------->            |
# 3: [###########------------]
#
BAR = ['[', '=', '=', '-', ']']
#BAR = ['|', '-', '>', ':', '|']
#BAR = ['[', '#', '#', '-', ']']
#
# the initial string for the top line
# "HEADER XX minutes and XX seconds."
HEADER = "Tea ready in"
#
# how long to sleep between updates
SLEEP = 0.05
#
# time in seconds used if no time specified on command line
TIME = 220
#
# CAUTION: WILL FLASH LIKE NOTHING YOU'VE SEEN BEFORE! Turn off
# by setting it to False
#
# (if you get an epileptic seisure, don't blame me, I tried to
# warn you!)
FLASH = True
#############################################
 
def main():
    time_passed = bar_length = 0
    time_last = time.time()
    time_length = TIME
    if len(sys.argv) > 1:
        time_length = int(sys.argv[1])
 
    done = False
    while True:
        # dynamic width of progress bar (-12 for percentage to fit)
        bar_max_length = int(os.popen('stty size', 'r').read().split()[1])-12
 
        # determine how OFTEN we should update the progress bar
        dbar = time_length/bar_max_length/SLEEP
        # determine how MUCH we should update the progress bar
        if time_length is 0:
            dbarl = bar_max_length - bar_length
        else:
            dbarl = bar_max_length/time_length*SLEEP
 
        # calculate minutes and seconds left
        secs = time_left = int(time_length - time_passed)
        mins = 0
        if time_left > 59:
            mins = int(time_left/60.0)
            secs = time_left % 60
 
        # calculate remaining time and generate the top line
        output = HEADER + ' ' + str(mins)
 
        if mins == 1:   output += " minute and "
        else:           output += " minutes and "
 
        if secs == 1:   stdscr.addstr(0, 1, output + str(secs) + " second.\n")
        else:           stdscr.addstr(0, 1, output + str(secs) + " seconds.\n")
 
        # calculate percentage for the bottom line
        if time_length is not 0:
            percent = float(time_passed)/float(time_length)*100
        if percent > 100 or done:
            percent = 100
 
        # right align percentage
        output = '  '
        if   percent == 100: output = ''
        elif percent >=  10: output = ' '
        output += '%.2f%% ' % percent
 
        # always recount the length of the bar; user might have resized window
        bar_length = percent * (bar_max_length/100.0)
        if bar_length > bar_max_length:
            bar_length = bar_max_length
 
        # generate the progress bar and print it together with the percentage
        output += gen_progress_bar(bar_length, bar_max_length)
        stdscr.addstr(1, 1, output)
 
        # time_delta is how long since we were here the last time
        time_delta = time.time() - time_last
        time_passed += float('%.4f' % time_delta)
        time_last = time.time()
 
        # nothing is acutally shown until now
        stdscr.refresh()
 
        # if we're done
        if done:
            break
        # loop once more so we get 0 seconds left in the end
        if time_passed >= time_length:
            done = True
 
        else:
            time.sleep(SLEEP)
 
    return
 
# generate the progress bar that will later be printed
def gen_progress_bar(bar_length, bar_max_length):
    output = BAR[0]
    # print the length of the bar with the defined character
    for i in range(0, int(bar_length)):
        output += BAR[1]
 
    output += BAR[2]
    # print the defined character to fill the remaining part of the bar
    for i in range(abs(int(bar_length) - int(bar_max_length))):
        output += BAR[3]
 
    output += BAR[4]
    return output
 
# pack it up boys, it's time to go home
def cleanup():
    stdscr.keypad(0)
    curses.echo();
    curses.nocbreak()
    curses.endwin()
    sys.exit(0)
 
def print_usage():
    print "usage: tea [time in seconds]"
    print "    --help, -h    shows this help"
 
def is_data():
    return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], [])
 
if __name__ == "__main__":
    if len(sys.argv) > 1 and not sys.argv[1].isdigit():
        print_usage()
        sys.exit(0)
 
    atexit.register(cleanup)
    try:
        # Initialize curses
        stdscr = curses.initscr()
        curses.noecho();
        curses.cbreak()
        stdscr.keypad(1)
        main()
 
    except:
        traceback.print_exc()
 
    # non blocking input, any key stops the program
    while FLASH:
        if is_data():
            break
 
        curses.flash()
        # not so intense flashing
        time.sleep(0.9)
 
    # Normal exit when killed
    signal(SIGTERM, lambda signum, stack_frame: exit(1))

Code is also available on svn:

svn co svn://eldslott.org/python/tea/ ./tea/

5 comments » | code

Tcl script for IRC bots to grab title from posted http(s) links

June 13th, 2009 — 10:53pm

I’ve been searching for a script like this for a while, but never found one, so I wrote my own instead. I use it with Eggdrop, but it should work with EnergyMech 3.x, which also supports Tcl scripting. I don’t know if there are other IRC bots that support Tcl scripts though.

What is does is that when someone posts a http or https link in a channel with mode +title on it (“.chanset <channel> +title” in eggdrop), the script invoces curl and uses regex to extract the text in the <title> tags and sends it to the channel. If no title is available, nothing is sent.

It’s quite simple. First we get the whole line that starts with either http:// och https://, then we skip everything after the first space, and discards any handles that might be in the link (http://example.com/index.html#info):

1
2
set txt [lindex [split $txt " "] 0]
set txt [lindex [split $txt "#"] 0]

Now that we have the clean URL, we check the permission on the channel (if it has the +title flag), and just return silently if it doesn’t.

1
2
3
4
set permission_result [channel_check_title $chan]
if {$permission_result == 0} {
    return
}

Next is the flood control. This just makes sure that the bot doesn’t flood the channel if many people posts a lot of links in a short time. After that we invoce curl to to get our page (--insecure only skips SSL certificate verification).

1
catch {exec $binary(CURL) --insecure "$txt"} html

HTML entities are replaced using a very short perl script, using a small perl interpreter so we can write inline perl code (from the Tcl wiki).

1
2
3
4
# execute perl code
proc perl_interpret script {
    return [exec perl << [uplevel [list subst -novariables -noback $script]]]
}

Then we check if there is a title tag using regex, and if there is one, we extract the title and run our perl code to replace HTML-entities.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# get title
if [regexp {<(title|TITLE)>[^<]+} $html title] {
    set pos [expr [string last > $title] + 1]
    set title [string range $title $pos end]
 
    # use perl to decode html entities
    set title [perl_interpret {
        use HTML::Entities;
        print decode_entities("[set title]");
    }]
 
    # replace multiple spaces and newlines
    set title [subst [regsub -all {\n} $title " "]]
    set title [subst [regsub -all {[ ]{2,}} $title " "]]
}

Example output:

22:49:50 <@scorch> http://www.youtube.com/watch?v=kcIkziqI29I
22:49:52 < ^T-1000> Title: YouTube - Terminator Salvation: The Machinima Series (Ep. 4 Preview) [HD]

The script:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# location of curl
set binary(CURL) "/usr/bin/curl"
# location of perl
set binary(PERL) "/usr/bin/perl"
#http connection timeout (milliseconds)
set title_timeout "25000"
#flood-control
set queue_enabled 1
#max requests
set queue_size 5
#per ? seconds
set queue_time 10
# use or not the debugger (1=enable debug  0=disable debug)
set TITLE_DEBUG 1
# if set to 1, will print in channel, 0 will notice user who issued the trigger
set PUB_OR_NOT 1
 
# only run the script on lines that start with the link
#
# The following works:
# http://example.com
# https://example.com/folder/
# http://example.com/page.html <-- great page
#
# These do not:
# look at this page: http://example.com/
set trigger "% http*://*"
 
bind pubm - $trigger pubm:title_proc
 
# channel flags needed for the script to be enabled in that channel
setudef flag title
 
# check if the channel has the correct flags
proc channel_check_title {chan} {
    foreach setting [channel info $chan] {
        if {[regexp -- {^[\+-]} $setting]} {
            if {![string compare "+$flag" $setting]} {
                set permission 1
                break
            } else {
                set permission 0
            }
        }
    }
    return $permission
}
 
# execute perl code
proc perl_interpret script {
    return [exec perl << [uplevel [list subst -novariables -noback $script]]]
}
 
proc pubm:title_proc {nick uhost handle chan txt} {
    global instance queue_size queue_time queue_enabled title  title_timeout
    global TITLE_DEBUG PUB_OR_NOT announce random warn_msg trigger binary
 
    set txt [lindex [split $txt " "] 0]
    set txt [lindex [split $txt "#"] 0]
    set title "N/A"
 
    set permission_result [channel_check_title $chan]
    if {$permission_result == 0} {
        return
    }
 
    set toput "NOTICE $nick"
    if {$PUB_OR_NOT == 1} {
        set toput "PRIVMSG $chan"
    }
 
    #flood-control
    if {$queue_enabled == 1} {
       #flooded?
       if {$instance >= $queue_size} {
          if {$TITLE_DEBUG == 1} {
             putlog "TITLE_DEBUG flood detected"
          }
          if {$warn_msg == 0} {
             set warn_msg 1
             putquick "$toput :Flood-Control: Request for \"$txt\" from user \"$nick\" will not be answered."
             putquick "$toput :Flood-Control: Maximum of $queue_size requests every $queue_time seconds."
             utimer 120 wmsg
          }
          return
       }
       incr instance
       if {$TITLE_DEBUG == 1} {
           putlog "TITLE_DEBUG new instance == $instance"
       }
       utimer [set queue_time] decr_inst
    }
 
    if {$TITLE_DEBUG == 1} {
        putlog "TITLE_DEBUG url = $txt"
    }
    # fetch html
    catch {exec $binary(CURL) -U "what the asdf?" --insecure "$txt"} html
 
    # get title
    if [regexp {<(title|TITLE)>[^<]+} $html title] {
        set pos [expr [string last > $title] + 1]
        set title [string range $title $pos end]
 
        # use perl to decode html entities
        set title [perl_interpret {
            use HTML::Entities;
            binmode STDOUT, ":utf8";
            print decode_entities("[set title]");
        }]
 
        # replace newlines with spaces
        set title [subst [regsub -all {\n} $title " "]]
        # replace tabs with spaces
        set title [subst [regsub -all {\t} $title " "]]
        # replace multiple spaces with a single space
        set title [subst [regsub -all {[ ]{2,}} $title " "]]
    }
    if {$TITLE_DEBUG == 1} {
        putlog "TITLE_DEBUG title == $title"
    }
    # be quiet of we don't find any title
    if {$title == "N/A"} {
        return
    }
 
    # send to channel/nick
    puthelp "$toput :Title: $title"
}
 
# so we see that it gets loaded by eggdrop
putlog "HTTP-Title v0.2 loaded"

You may also get the source from svn:

svn co svn://eldslott.org/tcl/http_title/http_title.tcl

2 comments » | code, Uncategorized

Back to top